All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] PCI: Refine broken INTx masking for Mellanox devices
@ 2016-04-19 16:33 Majd Dibbiny
  2016-04-19 19:04 ` Alex Williamson
  0 siblings, 1 reply; 13+ messages in thread
From: Majd Dibbiny @ 2016-04-19 16:33 UTC (permalink / raw)
  To: bhelgaas; +Cc: linux-pci, ogerlitz, Noa Osherovich, Majd Dibbiny

From: Noa Osherovich <noaos@mellanox.com>

Mellanox devices were marked as having INTx masking ability broken.
As a result, the VFIO driver fails to load when the device was
passed-through to a VM.

This patch excludes ConnectX-4, ConnectX4-Lx and Connect-IB from the
list of Mellanox devices marked as having broken INTx masking:

- ConnectX-4 and ConnectX4-Lx either declare INTx are not supported
  (FW versions 12.14.2036 / 14.12.2036) or support them properly
  (starting May 2016 firmware release). Users having earlier firmware
  versions will have to update to either one.
- Connect-IB does not support INTx currently so will not cause any
  problem.

Fixes: 11e42532ada31 ('PCI: Assume all Mellanox devices have ...')
Signed-off-by: Noa Osherovich <noaos@mellanox.com>
Signed-off-by: Majd Dibbiny <majd@mellanox.com>
---
 drivers/pci/quirks.c    | 61 ++++++++++++++++++++++++++++++++++++++++++++++++-
 include/linux/pci_ids.h | 14 ++++++++++++
 2 files changed, 74 insertions(+), 1 deletion(-)

diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
index 8e67802..6a1856e 100644
--- a/drivers/pci/quirks.c
+++ b/drivers/pci/quirks.c
@@ -3147,7 +3147,66 @@ DECLARE_PCI_FIXUP_HEADER(0x1814, 0x0601, /* Ralink RT2800 802.11n PCI */
  */
 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_REALTEK, 0x8169,
 			 quirk_broken_intx_masking);
-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX, PCI_ANY_ID,
+
+/*
+ * Mellanox devices that fail under PCI device assignment using DisINTx masking
+ */
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX, PCI_DEVICE_ID_MELLANOX_TAVOR,
+			 quirk_broken_intx_masking);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX,
+			 PCI_DEVICE_ID_MELLANOX_TAVOR_BRIDGE,
+			 quirk_broken_intx_masking);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX, PCI_DEVICE_ID_MELLANOX_ARBEL,
+			 quirk_broken_intx_masking);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX,
+			 PCI_DEVICE_ID_MELLANOX_ARBEL_COMPAT,
+			 quirk_broken_intx_masking);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX, PCI_DEVICE_ID_MELLANOX_SINAI,
+			 quirk_broken_intx_masking);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX,
+			 PCI_DEVICE_ID_MELLANOX_SINAI_OLD,
+			 quirk_broken_intx_masking);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX,
+			 PCI_DEVICE_ID_MELLANOX_HERMON_SDR,
+			 quirk_broken_intx_masking);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX,
+			 PCI_DEVICE_ID_MELLANOX_HERMON_DDR,
+			 quirk_broken_intx_masking);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX,
+			 PCI_DEVICE_ID_MELLANOX_HERMON_QDR,
+			 quirk_broken_intx_masking);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX,
+			 PCI_DEVICE_ID_MELLANOX_HERMON_DDR_GEN2,
+			 quirk_broken_intx_masking);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX,
+			 PCI_DEVICE_ID_MELLANOX_HERMON_QDR_GEN2,
+			 quirk_broken_intx_masking);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX,
+			 PCI_DEVICE_ID_MELLANOX_HERMON_EN,
+			 quirk_broken_intx_masking);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX,
+			 PCI_DEVICE_ID_MELLANOX_HERMON_EN_GEN2,
+			 quirk_broken_intx_masking);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX,
+			 PCI_DEVICE_ID_MELLANOX_CONNECTX_EN,
+			 quirk_broken_intx_masking);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX,
+			 PCI_DEVICE_ID_MELLANOX_CONNECTX_EN_T_GEN2,
+			 quirk_broken_intx_masking);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX,
+			 PCI_DEVICE_ID_MELLANOX_CONNECTX_EN_GEN2,
+			 quirk_broken_intx_masking);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX,
+			 PCI_DEVICE_ID_MELLANOX_CONNECTX_EN_5_GEN2,
+			 quirk_broken_intx_masking);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX,
+			 PCI_DEVICE_ID_MELLANOX_CONNECTX2,
+			 quirk_broken_intx_masking);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX,
+			 PCI_DEVICE_ID_MELLANOX_CONNECTX3,
+			 quirk_broken_intx_masking);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX,
+			 PCI_DEVICE_ID_MELLANOX_CONNECTX3_PRO,
 			 quirk_broken_intx_masking);
 
 static void quirk_no_bus_reset(struct pci_dev *dev)
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 247da8c..bc8918e 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -2262,6 +2262,20 @@
 #define PCI_DEVICE_ID_MELLANOX_ARBEL	0x6282
 #define PCI_DEVICE_ID_MELLANOX_SINAI_OLD 0x5e8c
 #define PCI_DEVICE_ID_MELLANOX_SINAI	0x6274
+#define PCI_DEVICE_ID_MELLANOX_HERMON_SDR 0x6340
+#define PCI_DEVICE_ID_MELLANOX_HERMON_DDR 0x634a
+#define PCI_DEVICE_ID_MELLANOX_HERMON_QDR 0x6354
+#define PCI_DEVICE_ID_MELLANOX_HERMON_DDR_GEN2 0x6732
+#define PCI_DEVICE_ID_MELLANOX_HERMON_QDR_GEN2 0x673c
+#define PCI_DEVICE_ID_MELLANOX_HERMON_EN 0x6368
+#define PCI_DEVICE_ID_MELLANOX_HERMON_EN_GEN2 0x6750
+#define PCI_DEVICE_ID_MELLANOX_CONNECTX_EN 0x6372
+#define PCI_DEVICE_ID_MELLANOX_CONNECTX_EN_T_GEN2 0x675a
+#define PCI_DEVICE_ID_MELLANOX_CONNECTX_EN_GEN2 0x6764
+#define PCI_DEVICE_ID_MELLANOX_CONNECTX_EN_5_GEN2 0x6746
+#define PCI_DEVICE_ID_MELLANOX_CONNECTX2 0x676e
+#define PCI_DEVICE_ID_MELLANOX_CONNECTX3 0x1003
+#define PCI_DEVICE_ID_MELLANOX_CONNECTX3_PRO 0x1007
 
 #define PCI_VENDOR_ID_DFI		0x15bd
 
-- 
1.8.3.1


^ permalink raw reply related	[flat|nested] 13+ messages in thread

* Re: [PATCH] PCI: Refine broken INTx masking for Mellanox devices
  2016-04-19 16:33 [PATCH] PCI: Refine broken INTx masking for Mellanox devices Majd Dibbiny
@ 2016-04-19 19:04 ` Alex Williamson
  2016-04-21  7:12   ` Majd Dibbiny
  0 siblings, 1 reply; 13+ messages in thread
From: Alex Williamson @ 2016-04-19 19:04 UTC (permalink / raw)
  To: Majd Dibbiny; +Cc: bhelgaas, linux-pci, ogerlitz, Noa Osherovich

On Tue, 19 Apr 2016 19:33:36 +0300
Majd Dibbiny <majd@mellanox.com> wrote:

> From: Noa Osherovich <noaos@mellanox.com>
> 
> Mellanox devices were marked as having INTx masking ability broken.
> As a result, the VFIO driver fails to load when the device was
> passed-through to a VM.
> 
> This patch excludes ConnectX-4, ConnectX4-Lx and Connect-IB from the
> list of Mellanox devices marked as having broken INTx masking:
> 
> - ConnectX-4 and ConnectX4-Lx either declare INTx are not supported
>   (FW versions 12.14.2036 / 14.12.2036) or support them properly
>   (starting May 2016 firmware release). Users having earlier firmware
>   versions will have to update to either one.
> - Connect-IB does not support INTx currently so will not cause any
>   problem.

What's the user and support experience here?  A user gets a new kernel
that no longer marks INTx masking as bad (note that if the device does
not support INTx by returing 0 in the pin register, it doesn't matter
whether it's marked bad or not), they try to assign the device and get
an interrupt storm on the guest and go complain to support.  Support
tries to reproduce, maybe can, maybe can't, depending on the firmware
version they happen to have running, everyone gets very unhappy until
the thing that almost never works, "are you running the latest
firmware", actually works.  Can we instead add a new function for
mellanox that can probe the firmware version and continue marking INTx
masking bad with a warning noting that a firmware update will resolve
it?  Thanks,

Alex

> 
> Fixes: 11e42532ada31 ('PCI: Assume all Mellanox devices have ...')
> Signed-off-by: Noa Osherovich <noaos@mellanox.com>
> Signed-off-by: Majd Dibbiny <majd@mellanox.com>
> ---
>  drivers/pci/quirks.c    | 61 ++++++++++++++++++++++++++++++++++++++++++++++++-
>  include/linux/pci_ids.h | 14 ++++++++++++
>  2 files changed, 74 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
> index 8e67802..6a1856e 100644
> --- a/drivers/pci/quirks.c
> +++ b/drivers/pci/quirks.c
> @@ -3147,7 +3147,66 @@ DECLARE_PCI_FIXUP_HEADER(0x1814, 0x0601, /* Ralink RT2800 802.11n PCI */
>   */
>  DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_REALTEK, 0x8169,
>  			 quirk_broken_intx_masking);
> -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX, PCI_ANY_ID,
> +
> +/*
> + * Mellanox devices that fail under PCI device assignment using DisINTx masking
> + */
> +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX, PCI_DEVICE_ID_MELLANOX_TAVOR,
> +			 quirk_broken_intx_masking);
> +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX,
> +			 PCI_DEVICE_ID_MELLANOX_TAVOR_BRIDGE,
> +			 quirk_broken_intx_masking);
> +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX, PCI_DEVICE_ID_MELLANOX_ARBEL,
> +			 quirk_broken_intx_masking);
> +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX,
> +			 PCI_DEVICE_ID_MELLANOX_ARBEL_COMPAT,
> +			 quirk_broken_intx_masking);
> +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX, PCI_DEVICE_ID_MELLANOX_SINAI,
> +			 quirk_broken_intx_masking);
> +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX,
> +			 PCI_DEVICE_ID_MELLANOX_SINAI_OLD,
> +			 quirk_broken_intx_masking);
> +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX,
> +			 PCI_DEVICE_ID_MELLANOX_HERMON_SDR,
> +			 quirk_broken_intx_masking);
> +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX,
> +			 PCI_DEVICE_ID_MELLANOX_HERMON_DDR,
> +			 quirk_broken_intx_masking);
> +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX,
> +			 PCI_DEVICE_ID_MELLANOX_HERMON_QDR,
> +			 quirk_broken_intx_masking);
> +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX,
> +			 PCI_DEVICE_ID_MELLANOX_HERMON_DDR_GEN2,
> +			 quirk_broken_intx_masking);
> +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX,
> +			 PCI_DEVICE_ID_MELLANOX_HERMON_QDR_GEN2,
> +			 quirk_broken_intx_masking);
> +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX,
> +			 PCI_DEVICE_ID_MELLANOX_HERMON_EN,
> +			 quirk_broken_intx_masking);
> +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX,
> +			 PCI_DEVICE_ID_MELLANOX_HERMON_EN_GEN2,
> +			 quirk_broken_intx_masking);
> +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX,
> +			 PCI_DEVICE_ID_MELLANOX_CONNECTX_EN,
> +			 quirk_broken_intx_masking);
> +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX,
> +			 PCI_DEVICE_ID_MELLANOX_CONNECTX_EN_T_GEN2,
> +			 quirk_broken_intx_masking);
> +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX,
> +			 PCI_DEVICE_ID_MELLANOX_CONNECTX_EN_GEN2,
> +			 quirk_broken_intx_masking);
> +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX,
> +			 PCI_DEVICE_ID_MELLANOX_CONNECTX_EN_5_GEN2,
> +			 quirk_broken_intx_masking);
> +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX,
> +			 PCI_DEVICE_ID_MELLANOX_CONNECTX2,
> +			 quirk_broken_intx_masking);
> +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX,
> +			 PCI_DEVICE_ID_MELLANOX_CONNECTX3,
> +			 quirk_broken_intx_masking);
> +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX,
> +			 PCI_DEVICE_ID_MELLANOX_CONNECTX3_PRO,
>  			 quirk_broken_intx_masking);
>  
>  static void quirk_no_bus_reset(struct pci_dev *dev)
> diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
> index 247da8c..bc8918e 100644
> --- a/include/linux/pci_ids.h
> +++ b/include/linux/pci_ids.h
> @@ -2262,6 +2262,20 @@
>  #define PCI_DEVICE_ID_MELLANOX_ARBEL	0x6282
>  #define PCI_DEVICE_ID_MELLANOX_SINAI_OLD 0x5e8c
>  #define PCI_DEVICE_ID_MELLANOX_SINAI	0x6274
> +#define PCI_DEVICE_ID_MELLANOX_HERMON_SDR 0x6340
> +#define PCI_DEVICE_ID_MELLANOX_HERMON_DDR 0x634a
> +#define PCI_DEVICE_ID_MELLANOX_HERMON_QDR 0x6354
> +#define PCI_DEVICE_ID_MELLANOX_HERMON_DDR_GEN2 0x6732
> +#define PCI_DEVICE_ID_MELLANOX_HERMON_QDR_GEN2 0x673c
> +#define PCI_DEVICE_ID_MELLANOX_HERMON_EN 0x6368
> +#define PCI_DEVICE_ID_MELLANOX_HERMON_EN_GEN2 0x6750
> +#define PCI_DEVICE_ID_MELLANOX_CONNECTX_EN 0x6372
> +#define PCI_DEVICE_ID_MELLANOX_CONNECTX_EN_T_GEN2 0x675a
> +#define PCI_DEVICE_ID_MELLANOX_CONNECTX_EN_GEN2 0x6764
> +#define PCI_DEVICE_ID_MELLANOX_CONNECTX_EN_5_GEN2 0x6746
> +#define PCI_DEVICE_ID_MELLANOX_CONNECTX2 0x676e
> +#define PCI_DEVICE_ID_MELLANOX_CONNECTX3 0x1003
> +#define PCI_DEVICE_ID_MELLANOX_CONNECTX3_PRO 0x1007
>  
>  #define PCI_VENDOR_ID_DFI		0x15bd
>  


^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] PCI: Refine broken INTx masking for Mellanox devices
  2016-04-19 19:04 ` Alex Williamson
@ 2016-04-21  7:12   ` Majd Dibbiny
  2016-04-25 12:45     ` Majd Dibbiny
  0 siblings, 1 reply; 13+ messages in thread
From: Majd Dibbiny @ 2016-04-21  7:12 UTC (permalink / raw)
  To: Alex Williamson; +Cc: bhelgaas, linux-pci, Or Gerlitz, Noa Osherovich



On 4/19/2016 10:04 PM, Alex Williamson wrote:
> On Tue, 19 Apr 2016 19:33:36 +0300
> Majd Dibbiny <majd@mellanox.com> wrote:
>
>> From: Noa Osherovich <noaos@mellanox.com>
>>
>> Mellanox devices were marked as having INTx masking ability broken.
>> As a result, the VFIO driver fails to load when the device was
>> passed-through to a VM.
>>
>> This patch excludes ConnectX-4, ConnectX4-Lx and Connect-IB from the
>> list of Mellanox devices marked as having broken INTx masking:
>>
>> - ConnectX-4 and ConnectX4-Lx either declare INTx are not supported
>>    (FW versions 12.14.2036 / 14.12.2036) or support them properly
>>    (starting May 2016 firmware release). Users having earlier firmware
>>    versions will have to update to either one.
>> - Connect-IB does not support INTx currently so will not cause any
>>    problem.
> What's the user and support experience here?  A user gets a new kernel
> that no longer marks INTx masking as bad (note that if the device does
> not support INTx by returing 0 in the pin register, it doesn't matter
> whether it's marked bad or not), they try to assign the device and get
> an interrupt storm on the guest and go complain to support.  Support
> tries to reproduce, maybe can, maybe can't, depending on the firmware
> version they happen to have running, everyone gets very unhappy until
> the thing that almost never works, "are you running the latest
> firmware", actually works.  Can we instead add a new function for
> mellanox that can probe the firmware version and continue marking INTx
> masking bad with a warning noting that a firmware update will resolve
> it?  Thanks,
>
> Alex
Hi Alex,
I understand your concern and therefore we have documented this in our 
Release notes to ease the troubleshooting experience for everyone.. We 
have also added the Firmware versions in the commit messages to make it 
clearer...

What do you think?

Thanks
>> Fixes: 11e42532ada31 ('PCI: Assume all Mellanox devices have ...')
>> Signed-off-by: Noa Osherovich <noaos@mellanox.com>
>> Signed-off-by: Majd Dibbiny <majd@mellanox.com>
>> ---
>>   drivers/pci/quirks.c    | 61 ++++++++++++++++++++++++++++++++++++++++++++++++-
>>   include/linux/pci_ids.h | 14 ++++++++++++
>>   2 files changed, 74 insertions(+), 1 deletion(-)
>>
>> diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
>> index 8e67802..6a1856e 100644
>> --- a/drivers/pci/quirks.c
>> +++ b/drivers/pci/quirks.c
>> @@ -3147,7 +3147,66 @@ DECLARE_PCI_FIXUP_HEADER(0x1814, 0x0601, /* Ralink RT2800 802.11n PCI */
>>    */
>>   DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_REALTEK, 0x8169,
>>   			 quirk_broken_intx_masking);
>> -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX, PCI_ANY_ID,
>> +
>> +/*
>> + * Mellanox devices that fail under PCI device assignment using DisINTx masking
>> + */
>> +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX, PCI_DEVICE_ID_MELLANOX_TAVOR,
>> +			 quirk_broken_intx_masking);
>> +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX,
>> +			 PCI_DEVICE_ID_MELLANOX_TAVOR_BRIDGE,
>> +			 quirk_broken_intx_masking);
>> +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX, PCI_DEVICE_ID_MELLANOX_ARBEL,
>> +			 quirk_broken_intx_masking);
>> +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX,
>> +			 PCI_DEVICE_ID_MELLANOX_ARBEL_COMPAT,
>> +			 quirk_broken_intx_masking);
>> +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX, PCI_DEVICE_ID_MELLANOX_SINAI,
>> +			 quirk_broken_intx_masking);
>> +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX,
>> +			 PCI_DEVICE_ID_MELLANOX_SINAI_OLD,
>> +			 quirk_broken_intx_masking);
>> +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX,
>> +			 PCI_DEVICE_ID_MELLANOX_HERMON_SDR,
>> +			 quirk_broken_intx_masking);
>> +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX,
>> +			 PCI_DEVICE_ID_MELLANOX_HERMON_DDR,
>> +			 quirk_broken_intx_masking);
>> +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX,
>> +			 PCI_DEVICE_ID_MELLANOX_HERMON_QDR,
>> +			 quirk_broken_intx_masking);
>> +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX,
>> +			 PCI_DEVICE_ID_MELLANOX_HERMON_DDR_GEN2,
>> +			 quirk_broken_intx_masking);
>> +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX,
>> +			 PCI_DEVICE_ID_MELLANOX_HERMON_QDR_GEN2,
>> +			 quirk_broken_intx_masking);
>> +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX,
>> +			 PCI_DEVICE_ID_MELLANOX_HERMON_EN,
>> +			 quirk_broken_intx_masking);
>> +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX,
>> +			 PCI_DEVICE_ID_MELLANOX_HERMON_EN_GEN2,
>> +			 quirk_broken_intx_masking);
>> +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX,
>> +			 PCI_DEVICE_ID_MELLANOX_CONNECTX_EN,
>> +			 quirk_broken_intx_masking);
>> +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX,
>> +			 PCI_DEVICE_ID_MELLANOX_CONNECTX_EN_T_GEN2,
>> +			 quirk_broken_intx_masking);
>> +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX,
>> +			 PCI_DEVICE_ID_MELLANOX_CONNECTX_EN_GEN2,
>> +			 quirk_broken_intx_masking);
>> +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX,
>> +			 PCI_DEVICE_ID_MELLANOX_CONNECTX_EN_5_GEN2,
>> +			 quirk_broken_intx_masking);
>> +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX,
>> +			 PCI_DEVICE_ID_MELLANOX_CONNECTX2,
>> +			 quirk_broken_intx_masking);
>> +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX,
>> +			 PCI_DEVICE_ID_MELLANOX_CONNECTX3,
>> +			 quirk_broken_intx_masking);
>> +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX,
>> +			 PCI_DEVICE_ID_MELLANOX_CONNECTX3_PRO,
>>   			 quirk_broken_intx_masking);
>>   
>>   static void quirk_no_bus_reset(struct pci_dev *dev)
>> diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
>> index 247da8c..bc8918e 100644
>> --- a/include/linux/pci_ids.h
>> +++ b/include/linux/pci_ids.h
>> @@ -2262,6 +2262,20 @@
>>   #define PCI_DEVICE_ID_MELLANOX_ARBEL	0x6282
>>   #define PCI_DEVICE_ID_MELLANOX_SINAI_OLD 0x5e8c
>>   #define PCI_DEVICE_ID_MELLANOX_SINAI	0x6274
>> +#define PCI_DEVICE_ID_MELLANOX_HERMON_SDR 0x6340
>> +#define PCI_DEVICE_ID_MELLANOX_HERMON_DDR 0x634a
>> +#define PCI_DEVICE_ID_MELLANOX_HERMON_QDR 0x6354
>> +#define PCI_DEVICE_ID_MELLANOX_HERMON_DDR_GEN2 0x6732
>> +#define PCI_DEVICE_ID_MELLANOX_HERMON_QDR_GEN2 0x673c
>> +#define PCI_DEVICE_ID_MELLANOX_HERMON_EN 0x6368
>> +#define PCI_DEVICE_ID_MELLANOX_HERMON_EN_GEN2 0x6750
>> +#define PCI_DEVICE_ID_MELLANOX_CONNECTX_EN 0x6372
>> +#define PCI_DEVICE_ID_MELLANOX_CONNECTX_EN_T_GEN2 0x675a
>> +#define PCI_DEVICE_ID_MELLANOX_CONNECTX_EN_GEN2 0x6764
>> +#define PCI_DEVICE_ID_MELLANOX_CONNECTX_EN_5_GEN2 0x6746
>> +#define PCI_DEVICE_ID_MELLANOX_CONNECTX2 0x676e
>> +#define PCI_DEVICE_ID_MELLANOX_CONNECTX3 0x1003
>> +#define PCI_DEVICE_ID_MELLANOX_CONNECTX3_PRO 0x1007
>>   
>>   #define PCI_VENDOR_ID_DFI		0x15bd
>>   


^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] PCI: Refine broken INTx masking for Mellanox devices
  2016-04-21  7:12   ` Majd Dibbiny
@ 2016-04-25 12:45     ` Majd Dibbiny
  2016-04-25 13:55       ` Alex Williamson
  0 siblings, 1 reply; 13+ messages in thread
From: Majd Dibbiny @ 2016-04-25 12:45 UTC (permalink / raw)
  To: Alex Williamson; +Cc: bhelgaas, linux-pci, Or Gerlitz, Noa Osherovich

Hi Bjorn,

Can you please merge this?

Thanks

On 4/21/2016 10:12 AM, Majd Dibbiny wrote:
>
>
> On 4/19/2016 10:04 PM, Alex Williamson wrote:
>> On Tue, 19 Apr 2016 19:33:36 +0300
>> Majd Dibbiny <majd@mellanox.com> wrote:
>>
>>> From: Noa Osherovich <noaos@mellanox.com>
>>>
>>> Mellanox devices were marked as having INTx masking ability broken.
>>> As a result, the VFIO driver fails to load when the device was
>>> passed-through to a VM.
>>>
>>> This patch excludes ConnectX-4, ConnectX4-Lx and Connect-IB from the
>>> list of Mellanox devices marked as having broken INTx masking:
>>>
>>> - ConnectX-4 and ConnectX4-Lx either declare INTx are not supported
>>>    (FW versions 12.14.2036 / 14.12.2036) or support them properly
>>>    (starting May 2016 firmware release). Users having earlier firmware
>>>    versions will have to update to either one.
>>> - Connect-IB does not support INTx currently so will not cause any
>>>    problem.
>> What's the user and support experience here?  A user gets a new kernel
>> that no longer marks INTx masking as bad (note that if the device does
>> not support INTx by returing 0 in the pin register, it doesn't matter
>> whether it's marked bad or not), they try to assign the device and get
>> an interrupt storm on the guest and go complain to support. Support
>> tries to reproduce, maybe can, maybe can't, depending on the firmware
>> version they happen to have running, everyone gets very unhappy until
>> the thing that almost never works, "are you running the latest
>> firmware", actually works.  Can we instead add a new function for
>> mellanox that can probe the firmware version and continue marking INTx
>> masking bad with a warning noting that a firmware update will resolve
>> it?  Thanks,
>>
>> Alex
> Hi Alex,
> I understand your concern and therefore we have documented this in our 
> Release notes to ease the troubleshooting experience for everyone.. We 
> have also added the Firmware versions in the commit messages to make 
> it clearer...
>
> What do you think?
>
> Thanks
>>> Fixes: 11e42532ada31 ('PCI: Assume all Mellanox devices have ...')
>>> Signed-off-by: Noa Osherovich <noaos@mellanox.com>
>>> Signed-off-by: Majd Dibbiny <majd@mellanox.com>
>>> ---
>>>   drivers/pci/quirks.c    | 61 
>>> ++++++++++++++++++++++++++++++++++++++++++++++++-
>>>   include/linux/pci_ids.h | 14 ++++++++++++
>>>   2 files changed, 74 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
>>> index 8e67802..6a1856e 100644
>>> --- a/drivers/pci/quirks.c
>>> +++ b/drivers/pci/quirks.c
>>> @@ -3147,7 +3147,66 @@ DECLARE_PCI_FIXUP_HEADER(0x1814, 0x0601, /* 
>>> Ralink RT2800 802.11n PCI */
>>>    */
>>>   DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_REALTEK, 0x8169,
>>>                quirk_broken_intx_masking);
>>> -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX, PCI_ANY_ID,
>>> +
>>> +/*
>>> + * Mellanox devices that fail under PCI device assignment using 
>>> DisINTx masking
>>> + */
>>> +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX, 
>>> PCI_DEVICE_ID_MELLANOX_TAVOR,
>>> +             quirk_broken_intx_masking);
>>> +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX,
>>> +             PCI_DEVICE_ID_MELLANOX_TAVOR_BRIDGE,
>>> +             quirk_broken_intx_masking);
>>> +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX, 
>>> PCI_DEVICE_ID_MELLANOX_ARBEL,
>>> +             quirk_broken_intx_masking);
>>> +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX,
>>> +             PCI_DEVICE_ID_MELLANOX_ARBEL_COMPAT,
>>> +             quirk_broken_intx_masking);
>>> +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX, 
>>> PCI_DEVICE_ID_MELLANOX_SINAI,
>>> +             quirk_broken_intx_masking);
>>> +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX,
>>> +             PCI_DEVICE_ID_MELLANOX_SINAI_OLD,
>>> +             quirk_broken_intx_masking);
>>> +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX,
>>> +             PCI_DEVICE_ID_MELLANOX_HERMON_SDR,
>>> +             quirk_broken_intx_masking);
>>> +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX,
>>> +             PCI_DEVICE_ID_MELLANOX_HERMON_DDR,
>>> +             quirk_broken_intx_masking);
>>> +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX,
>>> +             PCI_DEVICE_ID_MELLANOX_HERMON_QDR,
>>> +             quirk_broken_intx_masking);
>>> +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX,
>>> +             PCI_DEVICE_ID_MELLANOX_HERMON_DDR_GEN2,
>>> +             quirk_broken_intx_masking);
>>> +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX,
>>> +             PCI_DEVICE_ID_MELLANOX_HERMON_QDR_GEN2,
>>> +             quirk_broken_intx_masking);
>>> +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX,
>>> +             PCI_DEVICE_ID_MELLANOX_HERMON_EN,
>>> +             quirk_broken_intx_masking);
>>> +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX,
>>> +             PCI_DEVICE_ID_MELLANOX_HERMON_EN_GEN2,
>>> +             quirk_broken_intx_masking);
>>> +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX,
>>> +             PCI_DEVICE_ID_MELLANOX_CONNECTX_EN,
>>> +             quirk_broken_intx_masking);
>>> +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX,
>>> +             PCI_DEVICE_ID_MELLANOX_CONNECTX_EN_T_GEN2,
>>> +             quirk_broken_intx_masking);
>>> +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX,
>>> +             PCI_DEVICE_ID_MELLANOX_CONNECTX_EN_GEN2,
>>> +             quirk_broken_intx_masking);
>>> +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX,
>>> +             PCI_DEVICE_ID_MELLANOX_CONNECTX_EN_5_GEN2,
>>> +             quirk_broken_intx_masking);
>>> +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX,
>>> +             PCI_DEVICE_ID_MELLANOX_CONNECTX2,
>>> +             quirk_broken_intx_masking);
>>> +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX,
>>> +             PCI_DEVICE_ID_MELLANOX_CONNECTX3,
>>> +             quirk_broken_intx_masking);
>>> +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX,
>>> +             PCI_DEVICE_ID_MELLANOX_CONNECTX3_PRO,
>>>                quirk_broken_intx_masking);
>>>     static void quirk_no_bus_reset(struct pci_dev *dev)
>>> diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
>>> index 247da8c..bc8918e 100644
>>> --- a/include/linux/pci_ids.h
>>> +++ b/include/linux/pci_ids.h
>>> @@ -2262,6 +2262,20 @@
>>>   #define PCI_DEVICE_ID_MELLANOX_ARBEL    0x6282
>>>   #define PCI_DEVICE_ID_MELLANOX_SINAI_OLD 0x5e8c
>>>   #define PCI_DEVICE_ID_MELLANOX_SINAI    0x6274
>>> +#define PCI_DEVICE_ID_MELLANOX_HERMON_SDR 0x6340
>>> +#define PCI_DEVICE_ID_MELLANOX_HERMON_DDR 0x634a
>>> +#define PCI_DEVICE_ID_MELLANOX_HERMON_QDR 0x6354
>>> +#define PCI_DEVICE_ID_MELLANOX_HERMON_DDR_GEN2 0x6732
>>> +#define PCI_DEVICE_ID_MELLANOX_HERMON_QDR_GEN2 0x673c
>>> +#define PCI_DEVICE_ID_MELLANOX_HERMON_EN 0x6368
>>> +#define PCI_DEVICE_ID_MELLANOX_HERMON_EN_GEN2 0x6750
>>> +#define PCI_DEVICE_ID_MELLANOX_CONNECTX_EN 0x6372
>>> +#define PCI_DEVICE_ID_MELLANOX_CONNECTX_EN_T_GEN2 0x675a
>>> +#define PCI_DEVICE_ID_MELLANOX_CONNECTX_EN_GEN2 0x6764
>>> +#define PCI_DEVICE_ID_MELLANOX_CONNECTX_EN_5_GEN2 0x6746
>>> +#define PCI_DEVICE_ID_MELLANOX_CONNECTX2 0x676e
>>> +#define PCI_DEVICE_ID_MELLANOX_CONNECTX3 0x1003
>>> +#define PCI_DEVICE_ID_MELLANOX_CONNECTX3_PRO 0x1007
>>>     #define PCI_VENDOR_ID_DFI        0x15bd
>


^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] PCI: Refine broken INTx masking for Mellanox devices
  2016-04-25 12:45     ` Majd Dibbiny
@ 2016-04-25 13:55       ` Alex Williamson
       [not found]         ` <5720CFA1.6030200@mellanox.com>
  0 siblings, 1 reply; 13+ messages in thread
From: Alex Williamson @ 2016-04-25 13:55 UTC (permalink / raw)
  To: Majd Dibbiny; +Cc: bhelgaas, linux-pci, Or Gerlitz, Noa Osherovich

On Mon, 25 Apr 2016 15:45:37 +0300
Majd Dibbiny <majd@mellanox.com> wrote:

> Hi Bjorn,
> 
> Can you please merge this?
> 
> Thanks
> 
> On 4/21/2016 10:12 AM, Majd Dibbiny wrote:
> >
> >
> > On 4/19/2016 10:04 PM, Alex Williamson wrote:  
> >> On Tue, 19 Apr 2016 19:33:36 +0300
> >> Majd Dibbiny <majd@mellanox.com> wrote:
> >>  
> >>> From: Noa Osherovich <noaos@mellanox.com>
> >>>
> >>> Mellanox devices were marked as having INTx masking ability broken.
> >>> As a result, the VFIO driver fails to load when the device was
> >>> passed-through to a VM.
> >>>
> >>> This patch excludes ConnectX-4, ConnectX4-Lx and Connect-IB from the
> >>> list of Mellanox devices marked as having broken INTx masking:
> >>>
> >>> - ConnectX-4 and ConnectX4-Lx either declare INTx are not supported
> >>>    (FW versions 12.14.2036 / 14.12.2036) or support them properly
> >>>    (starting May 2016 firmware release). Users having earlier firmware
> >>>    versions will have to update to either one.
> >>> - Connect-IB does not support INTx currently so will not cause any
> >>>    problem.  
> >> What's the user and support experience here?  A user gets a new kernel
> >> that no longer marks INTx masking as bad (note that if the device does
> >> not support INTx by returing 0 in the pin register, it doesn't matter
> >> whether it's marked bad or not), they try to assign the device and get
> >> an interrupt storm on the guest and go complain to support. Support
> >> tries to reproduce, maybe can, maybe can't, depending on the firmware
> >> version they happen to have running, everyone gets very unhappy until
> >> the thing that almost never works, "are you running the latest
> >> firmware", actually works.  Can we instead add a new function for
> >> mellanox that can probe the firmware version and continue marking INTx
> >> masking bad with a warning noting that a firmware update will resolve
> >> it?  Thanks,
> >>
> >> Alex  
> > Hi Alex,
> > I understand your concern and therefore we have documented this in our 
> > Release notes to ease the troubleshooting experience for everyone.. We 
> > have also added the Firmware versions in the commit messages to make 
> > it clearer...
> >
> > What do you think?

My comment is trying to highlight the problem that users are not likely
to check for updated firmware for every device *before* installing a new
kernel, so there's a reasonable chance that a user will see this change
first as a regression in the kernel.  So documentation in the firmware
release notes is really only going to help very meticulous users.  For
everyone else it still looks like a kernel regression and a detailed
commit log is an after the fact aid to resolving the problem.  So, is
there some way we can detect the firmware from the quirk and make a
determination whether to enable broken intx, with a dmesg entry
indicating to update device firmware?  Thanks,

Alex

> >>> Fixes: 11e42532ada31 ('PCI: Assume all Mellanox devices have ...')
> >>> Signed-off-by: Noa Osherovich <noaos@mellanox.com>
> >>> Signed-off-by: Majd Dibbiny <majd@mellanox.com>
> >>> ---
> >>>   drivers/pci/quirks.c    | 61 
> >>> ++++++++++++++++++++++++++++++++++++++++++++++++-
> >>>   include/linux/pci_ids.h | 14 ++++++++++++
> >>>   2 files changed, 74 insertions(+), 1 deletion(-)
> >>>
> >>> diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
> >>> index 8e67802..6a1856e 100644
> >>> --- a/drivers/pci/quirks.c
> >>> +++ b/drivers/pci/quirks.c
> >>> @@ -3147,7 +3147,66 @@ DECLARE_PCI_FIXUP_HEADER(0x1814, 0x0601, /* 
> >>> Ralink RT2800 802.11n PCI */
> >>>    */
> >>>   DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_REALTEK, 0x8169,
> >>>                quirk_broken_intx_masking);
> >>> -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX, PCI_ANY_ID,
> >>> +
> >>> +/*
> >>> + * Mellanox devices that fail under PCI device assignment using 
> >>> DisINTx masking
> >>> + */
> >>> +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX, 
> >>> PCI_DEVICE_ID_MELLANOX_TAVOR,
> >>> +             quirk_broken_intx_masking);
> >>> +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX,
> >>> +             PCI_DEVICE_ID_MELLANOX_TAVOR_BRIDGE,
> >>> +             quirk_broken_intx_masking);
> >>> +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX, 
> >>> PCI_DEVICE_ID_MELLANOX_ARBEL,
> >>> +             quirk_broken_intx_masking);
> >>> +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX,
> >>> +             PCI_DEVICE_ID_MELLANOX_ARBEL_COMPAT,
> >>> +             quirk_broken_intx_masking);
> >>> +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX, 
> >>> PCI_DEVICE_ID_MELLANOX_SINAI,
> >>> +             quirk_broken_intx_masking);
> >>> +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX,
> >>> +             PCI_DEVICE_ID_MELLANOX_SINAI_OLD,
> >>> +             quirk_broken_intx_masking);
> >>> +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX,
> >>> +             PCI_DEVICE_ID_MELLANOX_HERMON_SDR,
> >>> +             quirk_broken_intx_masking);
> >>> +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX,
> >>> +             PCI_DEVICE_ID_MELLANOX_HERMON_DDR,
> >>> +             quirk_broken_intx_masking);
> >>> +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX,
> >>> +             PCI_DEVICE_ID_MELLANOX_HERMON_QDR,
> >>> +             quirk_broken_intx_masking);
> >>> +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX,
> >>> +             PCI_DEVICE_ID_MELLANOX_HERMON_DDR_GEN2,
> >>> +             quirk_broken_intx_masking);
> >>> +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX,
> >>> +             PCI_DEVICE_ID_MELLANOX_HERMON_QDR_GEN2,
> >>> +             quirk_broken_intx_masking);
> >>> +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX,
> >>> +             PCI_DEVICE_ID_MELLANOX_HERMON_EN,
> >>> +             quirk_broken_intx_masking);
> >>> +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX,
> >>> +             PCI_DEVICE_ID_MELLANOX_HERMON_EN_GEN2,
> >>> +             quirk_broken_intx_masking);
> >>> +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX,
> >>> +             PCI_DEVICE_ID_MELLANOX_CONNECTX_EN,
> >>> +             quirk_broken_intx_masking);
> >>> +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX,
> >>> +             PCI_DEVICE_ID_MELLANOX_CONNECTX_EN_T_GEN2,
> >>> +             quirk_broken_intx_masking);
> >>> +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX,
> >>> +             PCI_DEVICE_ID_MELLANOX_CONNECTX_EN_GEN2,
> >>> +             quirk_broken_intx_masking);
> >>> +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX,
> >>> +             PCI_DEVICE_ID_MELLANOX_CONNECTX_EN_5_GEN2,
> >>> +             quirk_broken_intx_masking);
> >>> +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX,
> >>> +             PCI_DEVICE_ID_MELLANOX_CONNECTX2,
> >>> +             quirk_broken_intx_masking);
> >>> +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX,
> >>> +             PCI_DEVICE_ID_MELLANOX_CONNECTX3,
> >>> +             quirk_broken_intx_masking);
> >>> +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX,
> >>> +             PCI_DEVICE_ID_MELLANOX_CONNECTX3_PRO,
> >>>                quirk_broken_intx_masking);
> >>>     static void quirk_no_bus_reset(struct pci_dev *dev)
> >>> diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
> >>> index 247da8c..bc8918e 100644
> >>> --- a/include/linux/pci_ids.h
> >>> +++ b/include/linux/pci_ids.h
> >>> @@ -2262,6 +2262,20 @@
> >>>   #define PCI_DEVICE_ID_MELLANOX_ARBEL    0x6282
> >>>   #define PCI_DEVICE_ID_MELLANOX_SINAI_OLD 0x5e8c
> >>>   #define PCI_DEVICE_ID_MELLANOX_SINAI    0x6274
> >>> +#define PCI_DEVICE_ID_MELLANOX_HERMON_SDR 0x6340
> >>> +#define PCI_DEVICE_ID_MELLANOX_HERMON_DDR 0x634a
> >>> +#define PCI_DEVICE_ID_MELLANOX_HERMON_QDR 0x6354
> >>> +#define PCI_DEVICE_ID_MELLANOX_HERMON_DDR_GEN2 0x6732
> >>> +#define PCI_DEVICE_ID_MELLANOX_HERMON_QDR_GEN2 0x673c
> >>> +#define PCI_DEVICE_ID_MELLANOX_HERMON_EN 0x6368
> >>> +#define PCI_DEVICE_ID_MELLANOX_HERMON_EN_GEN2 0x6750
> >>> +#define PCI_DEVICE_ID_MELLANOX_CONNECTX_EN 0x6372
> >>> +#define PCI_DEVICE_ID_MELLANOX_CONNECTX_EN_T_GEN2 0x675a
> >>> +#define PCI_DEVICE_ID_MELLANOX_CONNECTX_EN_GEN2 0x6764
> >>> +#define PCI_DEVICE_ID_MELLANOX_CONNECTX_EN_5_GEN2 0x6746
> >>> +#define PCI_DEVICE_ID_MELLANOX_CONNECTX2 0x676e
> >>> +#define PCI_DEVICE_ID_MELLANOX_CONNECTX3 0x1003
> >>> +#define PCI_DEVICE_ID_MELLANOX_CONNECTX3_PRO 0x1007
> >>>     #define PCI_VENDOR_ID_DFI        0x15bd  
> >  
> 


^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] PCI: Refine broken INTx masking for Mellanox devices
       [not found]         ` <5720CFA1.6030200@mellanox.com>
@ 2016-04-28  7:55           ` Majd Dibbiny
  0 siblings, 0 replies; 13+ messages in thread
From: Majd Dibbiny @ 2016-04-28  7:55 UTC (permalink / raw)
  To: Or Gerlitz, Alex Williamson; +Cc: bhelgaas, linux-pci, Noa Osherovich



On 4/27/2016 5:41 PM, Or Gerlitz wrote:
> On 4/25/2016 4:55 PM, Alex Williamson wrote:
>> My comment is trying to highlight the problem that users are not likely
>> to check for updated firmware for every device *before* installing a new
>> kernel, so there's a reasonable chance that a user will see this change
>> first as a regression in the kernel.  So documentation in the firmware
>> release notes is really only going to help very meticulous users.  For
>> everyone else it still looks like a kernel regression and a detailed
>> commit log is an after the fact aid to resolving the problem.  So, is
>> there some way we can detect the firmware from the quirk and make a
>> determination whether to enable broken intx, with a dmesg entry
>> indicating to update device firmware?  Thanks,
>
Yes, we can read the firmware version in the quirk. I'll prepare a new 
fix that determines whether to mark INTx masking as broken or not 
according to the firmware version.
> Yep, makes sense to me.
>
> If we can do that, lets go this way, Majd?
>
> Or.

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] PCI: Refine broken INTx masking for Mellanox devices
  2016-11-10 15:56     ` Noa Osherovich
@ 2016-11-10 23:22       ` Gavin Shan
  0 siblings, 0 replies; 13+ messages in thread
From: Gavin Shan @ 2016-11-10 23:22 UTC (permalink / raw)
  To: Noa Osherovich; +Cc: gwshan, helgaas, linux-pci, majd, alex.williamson, amir

On Thu, Nov 10, 2016 at 05:56:50PM +0200, Noa Osherovich wrote:

.../...

>>>> As a result, the VFIO driver fails to start when more than one device
>>>> function is passed-through to a VM if both have the same INTx pin.
>>>>
>> I don't see how the issue is related to the patch. INTx pin can be shared
>> by multiple functions and it's not going to be changed by the patch. When
>> the functions aren't claimed for capability to mask INTx from device side,
>> the shared INTx (or hardware IRQ) has to be masked from host (IRQ controller)
>> side. It does affect multiple VMs who are sharing thise INTx pin. I'm not
>> sure if it's the problem you have?
>
>VFIO reads the broken_intx_masking field when it's used on the host (VFIO uses a
>variable named pci_2_3). During vfio_intx_set_signal it sets the irqflags
>variable to 0 if the pci_2_3 is 0. When the first function is assigned to the
>guest, everything is OK.
>When a second function with the same INT-x pin is assigned, VFIO checks that they
>can share the IRQ.
>The irqflags variable is then checked to see if IRQF_SHARED is set. If not, which
>is the case here, VFIO fails to request IRQ thus failing and the guest can't start.
>

Noa, Thanks for the explanation. It makes sense.

Thanks,
Gavin

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] PCI: Refine broken INTx masking for Mellanox devices
  2016-11-10 15:00     ` Bjorn Helgaas
@ 2016-11-10 15:58       ` Noa Osherovich
  0 siblings, 0 replies; 13+ messages in thread
From: Noa Osherovich @ 2016-11-10 15:58 UTC (permalink / raw)
  To: Bjorn Helgaas, Gavin Shan
  Cc: bhelgaas, linux-pci, Amir Vadai, Majd Dibbiny, Alex Williamson, amir

Hi Bjorn,

On 11/10/2016 5:00 PM, Bjorn Helgaas wrote:

> On Thu, Nov 10, 2016 at 02:46:40PM +1100, Gavin Shan wrote:
>> On Wed, Nov 09, 2016 at 12:22:32PM -0600, Bjorn Helgaas wrote:
>>> [+cc Gavin, Amir, Majd, Alex]
>>>
>>> Hi Noa,
>>>
>>> On Tue, Nov 01, 2016 at 04:47:24PM +0200, Noa Osherovich wrote:
>>>> Mellanox devices were marked as having INTx masking ability broken.
>>> This was done by 11e42532ada3 ("PCI: Assume all Mellanox devices have
>>> broken INTx masking"), by Gavin Shan.  Possibly that was a little too
>>> aggressive.  I cc'd Gavin and Amir so they can comment.
>>>
>> I cannot know more HW details than Mellanox guys. So I assume the logic
>> in the code changes are correct. I totally agree with Bjron suggested
>> except one minor point: There will have two quirks for HEADER and FINAL
>> fixup separately as Bjorn suggested. One entry for one combination of
>> vendor/device will be put into section .pci_fixup_{header | final}.
>> The vendor/device IDs in the entries are compared with every PCI deivce
>> and the callbacks are invoked if they match. When number of entries in
>> section .pci_fixup_{header | final} increases, more time used to iterate
>> the section in pci_do_fixups(). The time will be saved if we can just
>> have one FINAL quirk as below, which probably makes code the easier to
>> be maintained.
>>
>> static void mellanox_check_broken_intx_masking(struct pci_dev *pdev)
>> {
>> 	if (pdev->device in broken_device_list)
>> 		quirk_broken_intx_masking(pdev);
>>         else if (pdev->dev is CONNECT4)
>>                 /* Check the firmware version to call quirk_broken_intx_masking() */
>> }
>>
>> DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_MELLANOX, PCI_ANY_ID, PCI_ANY_ID,
>> 			mellanox_check_broken_intx_masking);
>>
>> Above comment bases on the fact: pdev->broken_intx_masking is used by
>> device driver only. We still need anohter one HEADER quirk for Mellanox
>> (not one for vendor/device ID combination) if we want to see finalized
>> pdev->broken_intx_masking before that point.
> So you're proposing to make the quirk applicable to *all* Mellanox
> devices, then check against a list inside the quirk.  You still have
> to maintain a table of the broken devices.  I'm OK with that, and if
> you do it that way, it probably does make sense to combine them as you
> suggest.
>
> I agree that it would be simpler if these were all FINAL quirks.  I
> don't think *any* of the quirk_broken_intx_masking() quirks need to be
> HEADER quirks.  All of them could be FINAL quirks instead.
>
> I do *not* want a mix of HEADER & FINAL quirks that set
> broken_intx_masking.  If we have a mix, it's not clear when it's
> really needed and it's not clear how to decide which to use.
>
> Now we're up to three patches:
>
>   1) Convert all quirk_broken_intx_masking() quirks (not just the
>      Mellanox ones) from HEADER to FINAL.
>
>   2) Convert the Mellanox quirks from "all Mellanox devices" to the
>      listed ones only, something like this:
>
>       static u16 mellanox_broken_intx[] = { ... };
>
>       static void mellanox_broken_intx_quirk(struct pci_dev *dev)
>       {
> 	int i;
>
> 	for (i = 0; i < ARRAY_SIZE(mellanox_broken_intx); i++) {
> 	  if (dev->device == mellanox_broken_intx[i]) {
> 	    dev->broken_intx_masking = 1;
> 	    return;
> 	  }
> 	}
>       }
>       DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_MELLANOX, PCI_ANY_ID, PCI_ANY_ID,
>         mellanox_broken_intx_quirk);
>
>   3) Add the CONNECT4-specific firmware version checks to
>      mellanox_broken_intx_quirk().

Ack.

>>>> diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
>>>> index c58752fe16c4..1c742842e76c 100644
>>>> --- a/include/linux/pci_ids.h
>>>> +++ b/include/linux/pci_ids.h
>>>> @@ -2262,6 +2262,22 @@
>>>>  #define PCI_DEVICE_ID_MELLANOX_ARBEL	0x6282
>>>>  #define PCI_DEVICE_ID_MELLANOX_SINAI_OLD 0x5e8c
>>>>  #define PCI_DEVICE_ID_MELLANOX_SINAI	0x6274
>>>> +#define PCI_DEVICE_ID_MELLANOX_HERMON_SDR 0x6340
>>>> +#define PCI_DEVICE_ID_MELLANOX_HERMON_DDR 0x634a
>>>> +#define PCI_DEVICE_ID_MELLANOX_HERMON_QDR 0x6354
>>>> +#define PCI_DEVICE_ID_MELLANOX_HERMON_DDR_GEN2 0x6732
>>>> +#define PCI_DEVICE_ID_MELLANOX_HERMON_QDR_GEN2 0x673c
>>>> +#define PCI_DEVICE_ID_MELLANOX_HERMON_EN 0x6368
>>>> +#define PCI_DEVICE_ID_MELLANOX_HERMON_EN_GEN2 0x6750
>>>> +#define PCI_DEVICE_ID_MELLANOX_CONNECTX_EN 0x6372
>>>> +#define PCI_DEVICE_ID_MELLANOX_CONNECTX_EN_T_GEN2 0x675a
>>>> +#define PCI_DEVICE_ID_MELLANOX_CONNECTX_EN_GEN2 0x6764
>>>> +#define PCI_DEVICE_ID_MELLANOX_CONNECTX_EN_5_GEN2 0x6746
>>>> +#define PCI_DEVICE_ID_MELLANOX_CONNECTX2 0x676e
>>>> +#define PCI_DEVICE_ID_MELLANOX_CONNECTX3 0x1003
>>>> +#define PCI_DEVICE_ID_MELLANOX_CONNECTX3_PRO 0x1007
>>>> +#define PCI_DEVICE_ID_MELLANOX_CONNECTX4 0x1013
>>>> +#define PCI_DEVICE_ID_MELLANOX_CONNECTX4_LX 0x1015
> I forgot to mention: we normally don't add definitions to pci_ids.h
> unless they're used in more than one place (there's a comment about
> this at the top of pci_ids.h).  So these device IDs should all just be
> listed directly in the quirks or in the static table of broken
> devices.

Ack again. I'll send the patches early next week.

> Bjorn


^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] PCI: Refine broken INTx masking for Mellanox devices
  2016-11-10  3:46   ` Gavin Shan
  2016-11-10 15:00     ` Bjorn Helgaas
@ 2016-11-10 15:56     ` Noa Osherovich
  2016-11-10 23:22       ` Gavin Shan
  1 sibling, 1 reply; 13+ messages in thread
From: Noa Osherovich @ 2016-11-10 15:56 UTC (permalink / raw)
  To: gwshan; +Cc: helgaas, noaos, linux-pci, majd, alex.williamson, amir

Hi Gavin,

On 11/10/2016 5:46 AM, Gavin Shan wrote:

> On Wed, Nov 09, 2016 at 12:22:32PM -0600, Bjorn Helgaas wrote:
>> [+cc Gavin, Amir, Majd, Alex]
>>
>> Hi Noa,
>>
>> On Tue, Nov 01, 2016 at 04:47:24PM +0200, Noa Osherovich wrote:
>>> Mellanox devices were marked as having INTx masking ability broken.
>> This was done by 11e42532ada3 ("PCI: Assume all Mellanox devices have
>> broken INTx masking"), by Gavin Shan.  Possibly that was a little too
>> aggressive.  I cc'd Gavin and Amir so they can comment.
>>
> I cannot know more HW details than Mellanox guys. So I assume the logic
> in the code changes are correct. I totally agree with Bjron suggested
> except one minor point: There will have two quirks for HEADER and FINAL
> fixup separately as Bjorn suggested. One entry for one combination of
> vendor/device will be put into section .pci_fixup_{header | final}.
> The vendor/device IDs in the entries are compared with every PCI deivce
> and the callbacks are invoked if they match. When number of entries in
> section .pci_fixup_{header | final} increases, more time used to iterate
> the section in pci_do_fixups(). The time will be saved if we can just
> have one FINAL quirk as below, which probably makes code the easier to
> be maintained.
>
> static void mellanox_check_broken_intx_masking(struct pci_dev *pdev)
> {
> 	if (pdev->device in broken_device_list)
> 		quirk_broken_intx_masking(pdev);
>         else if (pdev->dev is CONNECT4)
>                 /* Check the firmware version to call quirk_broken_intx_masking() */
> }
>
> DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_MELLANOX, PCI_ANY_ID, PCI_ANY_ID,
> 			mellanox_check_broken_intx_masking);
>
> Above comment bases on the fact: pdev->broken_intx_masking is used by
> device driver only. We still need anohter one HEADER quirk for Mellanox
> (not one for vendor/device ID combination) if we want to see finalized
> pdev->broken_intx_masking before that point.
>
>> This patch is really doing two separate things:
>>
>>  1) Changing from a blanket "all Mellanox devices" have broken INTx to a
>>  specific "only these listed Mellanox devices" have broken INTx, and
>>
>>  2) Checking the firmware version of PCI_DEVICE_ID_MELLANOX_CONNECTX4 and
>>  PCI_DEVICE_ID_MELLANOX_CONNECTX4_LX to see if INTx masking works.
>>
>> I'd like these to be split into two patches: one that sets
>> broken_intx_masking for the list of Mellanox devices (including CONNECTX4),
>> and a second that adds quirk_connectx_4_verify_fw() and calls it instead of
>> quirk_broken_intx_masking() for the CONNECTX4 devices.
>>
>> That way, a hypothetical problem with a new Mellanox device that's not
>> on the list will bisect to the first patch, where the solution is
>> obvious, rather than to the combined patch, which would be much less
>> obvious.
>>
>>> As a result, the VFIO driver fails to start when more than one device
>>> function is passed-through to a VM if both have the same INTx pin.
>>>
> I don't see how the issue is related to the patch. INTx pin can be shared
> by multiple functions and it's not going to be changed by the patch. When
> the functions aren't claimed for capability to mask INTx from device side,
> the shared INTx (or hardware IRQ) has to be masked from host (IRQ controller)
> side. It does affect multiple VMs who are sharing thise INTx pin. I'm not
> sure if it's the problem you have?

VFIO reads the broken_intx_masking field when it's used on the host (VFIO uses a
variable named pci_2_3). During vfio_intx_set_signal it sets the irqflags
variable to 0 if the pci_2_3 is 0. When the first function is assigned to the
guest, everything is OK.
When a second function with the same INT-x pin is assigned, VFIO checks that they
can share the IRQ.
The irqflags variable is then checked to see if IRQF_SHARED is set. If not, which
is the case here, VFIO fails to request IRQ thus failing and the guest can't start.

>>> Prior to Connect-IB, Mellanox devices exposed to the operating system
>>> one PCI function per all ports.
>>> Starting from Connect-IB, the devices are function-per-port. When
>>> passing the second function to a VM, VFIO will fail to start.
>>>
>>> Exclude ConnectX-4, ConnectX4-Lx and Connect-IB from the list of
>>> Mellanox devices marked as having broken INTx masking:
>>>
>>> - ConnectX-4 and ConnectX4-LX firmware version is checked. If INTx
>>>   masking is supported, we unmark the broken INTx masking.
>>> - Connect-IB does not support INTx currently so will not cause any
>>>   problem.
>>>
>>> Fixes: 11e42532ada31 ('PCI: Assume all Mellanox devices have ...')
>>> Signed-off-by: Noa Osherovich <noaos@mellanox.com>
>>> Reviewed-by: Or Gerlitz <ogerlitz@mellanox.com>
>>> --
>>> Previous patch version can be found here:
>>> https://patchwork.ozlabs.org/patch/612222/
>>> ---
>>>  drivers/pci/quirks.c    | 111 +++++++++++++++++++++++++++++++++++++++++++++++-
>>>  include/linux/pci_ids.h |  16 +++++++
>>>  2 files changed, 126 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
>>> index c232729f5b1b..9e6d6aafc2ab 100644
>>> --- a/drivers/pci/quirks.c
>>> +++ b/drivers/pci/quirks.c
>>> @@ -3158,8 +3158,117 @@ static void quirk_broken_intx_masking(struct pci_dev *dev)
>>>   */
>>>  DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_REALTEK, 0x8169,
>>>  			 quirk_broken_intx_masking);
>>> -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX, PCI_ANY_ID,
>>> +
>>> +#define CONNECTX_4_CURR_MAX_MINOR 99
>>> +#define CONNECTX_4_INTX_SUPPORT_MINOR 14
>>> +
>>> +/*
>>> + * Checking ConnectX-4/LX FW version to see if it supports legacy interrupts.
>>> + * If so, don't mark it as broken.
>>> + * FW minor > 99 means older FW version format and no INTx masking support.
>>> + * FW minor < 14 means new FW version format and no INTx masking support.
>>> + */
>>> +static void quirk_connectx_4_verify_fw(struct pci_dev *dev)
>>> +{
>>> +	u16 pmcsr;
>>> +	u8 __iomem *fw_ver;
>>> +	u8 fw_minor;
>>> +
>>> +	dev->broken_intx_masking = 1;
>>> +
>>> +	/*
>>> +	 * Check that the device is in the D0 power state. If it's not,
>>> +	 * there is no point to look any further.
>>> +	 */
>>> +	if (dev->pm_cap) {
>>> +		pci_read_config_word(dev, dev->pm_cap + PCI_PM_CTRL, &pmcsr);
>>> +		if ((pmcsr & PCI_PM_CTRL_STATE_MASK) != PCI_D0)
>>> +			return;
>>> +	}
>>> +
>>> +	/* Convert from PCI bus to resource space.  */
>>> +	fw_ver = ioremap(pci_resource_start(dev, 0), 2);
>> This is a little problematic.  This is run as a HEADER quirk, and the
>> ioremap() depends on the BAR being initialized.  On x86, the BAR
>> likely has been initialized by the BIOS, but that's not true on all
>> arches, so we can't rely on it.
>>
>> I think you probably should make this a FINAL quirk and call
>> pci_enable_device_mem() first, then use pci_iomap().  That way all the
>> BARs will have been assigned, and it also means you don't need to
>> worry about state D0 above, because pci_enable_device_mem() will make
>> sure that the device is in D0.
>>
>>> +	if (!fw_ver) {
>>> +		dev_warn(&dev->dev, "Can't map ConnectX-4 initialization segment\n");
>>> +		return;
>>> +	}
>>> +
>>> +	fw_minor = readb(fw_ver + 1);
>>> +	if (fw_minor > CONNECTX_4_CURR_MAX_MINOR ||
>>> +	    fw_minor < CONNECTX_4_INTX_SUPPORT_MINOR)
>>> +		dev_warn(&dev->dev, "ConnectX-4: FW doesn't support INTx masking, disabling. Please upgrade FW for INTx support\n");
>> Can you print the current firmware version and maybe the required one
>> here?  I think that might help the user figure out how to fix the
>> problem.
>>
>>> +	else
>>> +		dev->broken_intx_masking = 0;
>>> +
>>> +	iounmap(fw_ver);
>>> +}
>>> +
>>> +/*
>>> + * Mellanox devices that fail under PCI device assignment using DisINTx masking
>>> + */
>>> +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX, PCI_DEVICE_ID_MELLANOX_TAVOR,
>>> +			 quirk_broken_intx_masking);
>>> +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX,
>>> +			 PCI_DEVICE_ID_MELLANOX_TAVOR_BRIDGE,
>>> +			 quirk_broken_intx_masking);
>>> +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX, PCI_DEVICE_ID_MELLANOX_ARBEL,
>>> +			 quirk_broken_intx_masking);
>>> +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX,
>>> +			 PCI_DEVICE_ID_MELLANOX_ARBEL_COMPAT,
>>> +			 quirk_broken_intx_masking);
>>> +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX, PCI_DEVICE_ID_MELLANOX_SINAI,
>>> +			 quirk_broken_intx_masking);
>>> +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX,
>>> +			 PCI_DEVICE_ID_MELLANOX_SINAI_OLD,
>>> +			 quirk_broken_intx_masking);
>>> +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX,
>>> +			 PCI_DEVICE_ID_MELLANOX_HERMON_SDR,
>>> +			 quirk_broken_intx_masking);
>>> +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX,
>>> +			 PCI_DEVICE_ID_MELLANOX_HERMON_DDR,
>>> +			 quirk_broken_intx_masking);
>>> +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX,
>>> +			 PCI_DEVICE_ID_MELLANOX_HERMON_QDR,
>>> +			 quirk_broken_intx_masking);
>>> +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX,
>>> +			 PCI_DEVICE_ID_MELLANOX_HERMON_DDR_GEN2,
>>> +			 quirk_broken_intx_masking);
>>> +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX,
>>> +			 PCI_DEVICE_ID_MELLANOX_HERMON_QDR_GEN2,
>>> +			 quirk_broken_intx_masking);
>>> +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX,
>>> +			 PCI_DEVICE_ID_MELLANOX_HERMON_EN,
>>> +			 quirk_broken_intx_masking);
>>> +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX,
>>> +			 PCI_DEVICE_ID_MELLANOX_HERMON_EN_GEN2,
>>> +			 quirk_broken_intx_masking);
>>> +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX,
>>> +			 PCI_DEVICE_ID_MELLANOX_CONNECTX_EN,
>>> +			 quirk_broken_intx_masking);
>>> +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX,
>>> +			 PCI_DEVICE_ID_MELLANOX_CONNECTX_EN_T_GEN2,
>>> +			 quirk_broken_intx_masking);
>>> +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX,
>>> +			 PCI_DEVICE_ID_MELLANOX_CONNECTX_EN_GEN2,
>>> +			 quirk_broken_intx_masking);
>>> +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX,
>>> +			 PCI_DEVICE_ID_MELLANOX_CONNECTX_EN_5_GEN2,
>>> +			 quirk_broken_intx_masking);
>>> +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX,
>>> +			 PCI_DEVICE_ID_MELLANOX_CONNECTX2,
>>> +			 quirk_broken_intx_masking);
>>> +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX,
>>> +			 PCI_DEVICE_ID_MELLANOX_CONNECTX3,
>>> +			 quirk_broken_intx_masking);
>>> +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX,
>>> +			 PCI_DEVICE_ID_MELLANOX_CONNECTX3_PRO,
>>>  			 quirk_broken_intx_masking);
>>> +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX,
>>> +			 PCI_DEVICE_ID_MELLANOX_CONNECTX4,
>>> +			 quirk_connectx_4_verify_fw);
>>> +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX,
>>> +			 PCI_DEVICE_ID_MELLANOX_CONNECTX4_LX,
>>> +			 quirk_connectx_4_verify_fw);
>>>  
>>>  /*
>>>   * Intel i40e (XL710/X710) 10/20/40GbE NICs all have broken INTx masking,
>>> diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
>>> index c58752fe16c4..1c742842e76c 100644
>>> --- a/include/linux/pci_ids.h
>>> +++ b/include/linux/pci_ids.h
>>> @@ -2262,6 +2262,22 @@
>>>  #define PCI_DEVICE_ID_MELLANOX_ARBEL	0x6282
>>>  #define PCI_DEVICE_ID_MELLANOX_SINAI_OLD 0x5e8c
>>>  #define PCI_DEVICE_ID_MELLANOX_SINAI	0x6274
>>> +#define PCI_DEVICE_ID_MELLANOX_HERMON_SDR 0x6340
>>> +#define PCI_DEVICE_ID_MELLANOX_HERMON_DDR 0x634a
>>> +#define PCI_DEVICE_ID_MELLANOX_HERMON_QDR 0x6354
>>> +#define PCI_DEVICE_ID_MELLANOX_HERMON_DDR_GEN2 0x6732
>>> +#define PCI_DEVICE_ID_MELLANOX_HERMON_QDR_GEN2 0x673c
>>> +#define PCI_DEVICE_ID_MELLANOX_HERMON_EN 0x6368
>>> +#define PCI_DEVICE_ID_MELLANOX_HERMON_EN_GEN2 0x6750
>>> +#define PCI_DEVICE_ID_MELLANOX_CONNECTX_EN 0x6372
>>> +#define PCI_DEVICE_ID_MELLANOX_CONNECTX_EN_T_GEN2 0x675a
>>> +#define PCI_DEVICE_ID_MELLANOX_CONNECTX_EN_GEN2 0x6764
>>> +#define PCI_DEVICE_ID_MELLANOX_CONNECTX_EN_5_GEN2 0x6746
>>> +#define PCI_DEVICE_ID_MELLANOX_CONNECTX2 0x676e
>>> +#define PCI_DEVICE_ID_MELLANOX_CONNECTX3 0x1003
>>> +#define PCI_DEVICE_ID_MELLANOX_CONNECTX3_PRO 0x1007
>>> +#define PCI_DEVICE_ID_MELLANOX_CONNECTX4 0x1013
>>> +#define PCI_DEVICE_ID_MELLANOX_CONNECTX4_LX 0x1015
>>>  
>>>  #define PCI_VENDOR_ID_DFI		0x15bd
>>>  
>>> -- 
>>> 1.8.3.1
>>>
>>> --
>>> To unsubscribe from this list: send the line "unsubscribe linux-pci" in
>>> the body of a message to majordomo@vger.kernel.org
>>> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] PCI: Refine broken INTx masking for Mellanox devices
  2016-11-10  3:46   ` Gavin Shan
@ 2016-11-10 15:00     ` Bjorn Helgaas
  2016-11-10 15:58       ` Noa Osherovich
  2016-11-10 15:56     ` Noa Osherovich
  1 sibling, 1 reply; 13+ messages in thread
From: Bjorn Helgaas @ 2016-11-10 15:00 UTC (permalink / raw)
  To: Gavin Shan
  Cc: Noa Osherovich, bhelgaas, linux-pci, Amir Vadai, Majd Dibbiny,
	Alex Williamson

On Thu, Nov 10, 2016 at 02:46:40PM +1100, Gavin Shan wrote:
> On Wed, Nov 09, 2016 at 12:22:32PM -0600, Bjorn Helgaas wrote:
> >[+cc Gavin, Amir, Majd, Alex]
> >
> >Hi Noa,
> >
> >On Tue, Nov 01, 2016 at 04:47:24PM +0200, Noa Osherovich wrote:
> >> Mellanox devices were marked as having INTx masking ability broken.
> >
> >This was done by 11e42532ada3 ("PCI: Assume all Mellanox devices have
> >broken INTx masking"), by Gavin Shan.  Possibly that was a little too
> >aggressive.  I cc'd Gavin and Amir so they can comment.
> >
> 
> I cannot know more HW details than Mellanox guys. So I assume the logic
> in the code changes are correct. I totally agree with Bjron suggested
> except one minor point: There will have two quirks for HEADER and FINAL
> fixup separately as Bjorn suggested. One entry for one combination of
> vendor/device will be put into section .pci_fixup_{header | final}.
> The vendor/device IDs in the entries are compared with every PCI deivce
> and the callbacks are invoked if they match. When number of entries in
> section .pci_fixup_{header | final} increases, more time used to iterate
> the section in pci_do_fixups(). The time will be saved if we can just
> have one FINAL quirk as below, which probably makes code the easier to
> be maintained.
> 
> static void mellanox_check_broken_intx_masking(struct pci_dev *pdev)
> {
> 	if (pdev->device in broken_device_list)
> 		quirk_broken_intx_masking(pdev);
>         else if (pdev->dev is CONNECT4)
>                 /* Check the firmware version to call quirk_broken_intx_masking() */
> }
> 
> DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_MELLANOX, PCI_ANY_ID, PCI_ANY_ID,
> 			mellanox_check_broken_intx_masking);
> 
> Above comment bases on the fact: pdev->broken_intx_masking is used by
> device driver only. We still need anohter one HEADER quirk for Mellanox
> (not one for vendor/device ID combination) if we want to see finalized
> pdev->broken_intx_masking before that point.

So you're proposing to make the quirk applicable to *all* Mellanox
devices, then check against a list inside the quirk.  You still have
to maintain a table of the broken devices.  I'm OK with that, and if
you do it that way, it probably does make sense to combine them as you
suggest.

I agree that it would be simpler if these were all FINAL quirks.  I
don't think *any* of the quirk_broken_intx_masking() quirks need to be
HEADER quirks.  All of them could be FINAL quirks instead.

I do *not* want a mix of HEADER & FINAL quirks that set
broken_intx_masking.  If we have a mix, it's not clear when it's
really needed and it's not clear how to decide which to use.

Now we're up to three patches:

  1) Convert all quirk_broken_intx_masking() quirks (not just the
     Mellanox ones) from HEADER to FINAL.

  2) Convert the Mellanox quirks from "all Mellanox devices" to the
     listed ones only, something like this:

      static u16 mellanox_broken_intx[] = { ... };

      static void mellanox_broken_intx_quirk(struct pci_dev *dev)
      {
	int i;

	for (i = 0; i < ARRAY_SIZE(mellanox_broken_intx); i++) {
	  if (dev->device == mellanox_broken_intx[i]) {
	    dev->broken_intx_masking = 1;
	    return;
	  }
	}
      }
      DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_MELLANOX, PCI_ANY_ID, PCI_ANY_ID,
        mellanox_broken_intx_quirk);

  3) Add the CONNECT4-specific firmware version checks to
     mellanox_broken_intx_quirk().

> >> diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
> >> index c58752fe16c4..1c742842e76c 100644
> >> --- a/include/linux/pci_ids.h
> >> +++ b/include/linux/pci_ids.h
> >> @@ -2262,6 +2262,22 @@
> >>  #define PCI_DEVICE_ID_MELLANOX_ARBEL	0x6282
> >>  #define PCI_DEVICE_ID_MELLANOX_SINAI_OLD 0x5e8c
> >>  #define PCI_DEVICE_ID_MELLANOX_SINAI	0x6274
> >> +#define PCI_DEVICE_ID_MELLANOX_HERMON_SDR 0x6340
> >> +#define PCI_DEVICE_ID_MELLANOX_HERMON_DDR 0x634a
> >> +#define PCI_DEVICE_ID_MELLANOX_HERMON_QDR 0x6354
> >> +#define PCI_DEVICE_ID_MELLANOX_HERMON_DDR_GEN2 0x6732
> >> +#define PCI_DEVICE_ID_MELLANOX_HERMON_QDR_GEN2 0x673c
> >> +#define PCI_DEVICE_ID_MELLANOX_HERMON_EN 0x6368
> >> +#define PCI_DEVICE_ID_MELLANOX_HERMON_EN_GEN2 0x6750
> >> +#define PCI_DEVICE_ID_MELLANOX_CONNECTX_EN 0x6372
> >> +#define PCI_DEVICE_ID_MELLANOX_CONNECTX_EN_T_GEN2 0x675a
> >> +#define PCI_DEVICE_ID_MELLANOX_CONNECTX_EN_GEN2 0x6764
> >> +#define PCI_DEVICE_ID_MELLANOX_CONNECTX_EN_5_GEN2 0x6746
> >> +#define PCI_DEVICE_ID_MELLANOX_CONNECTX2 0x676e
> >> +#define PCI_DEVICE_ID_MELLANOX_CONNECTX3 0x1003
> >> +#define PCI_DEVICE_ID_MELLANOX_CONNECTX3_PRO 0x1007
> >> +#define PCI_DEVICE_ID_MELLANOX_CONNECTX4 0x1013
> >> +#define PCI_DEVICE_ID_MELLANOX_CONNECTX4_LX 0x1015

I forgot to mention: we normally don't add definitions to pci_ids.h
unless they're used in more than one place (there's a comment about
this at the top of pci_ids.h).  So these device IDs should all just be
listed directly in the quirks or in the static table of broken
devices.

Bjorn

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] PCI: Refine broken INTx masking for Mellanox devices
  2016-11-09 18:22 ` Bjorn Helgaas
@ 2016-11-10  3:46   ` Gavin Shan
  2016-11-10 15:00     ` Bjorn Helgaas
  2016-11-10 15:56     ` Noa Osherovich
  0 siblings, 2 replies; 13+ messages in thread
From: Gavin Shan @ 2016-11-10  3:46 UTC (permalink / raw)
  To: Bjorn Helgaas
  Cc: Noa Osherovich, bhelgaas, linux-pci, Gavin Shan, Amir Vadai,
	Majd Dibbiny, Alex Williamson

On Wed, Nov 09, 2016 at 12:22:32PM -0600, Bjorn Helgaas wrote:
>[+cc Gavin, Amir, Majd, Alex]
>
>Hi Noa,
>
>On Tue, Nov 01, 2016 at 04:47:24PM +0200, Noa Osherovich wrote:
>> Mellanox devices were marked as having INTx masking ability broken.
>
>This was done by 11e42532ada3 ("PCI: Assume all Mellanox devices have
>broken INTx masking"), by Gavin Shan.  Possibly that was a little too
>aggressive.  I cc'd Gavin and Amir so they can comment.
>

I cannot know more HW details than Mellanox guys. So I assume the logic
in the code changes are correct. I totally agree with Bjron suggested
except one minor point: There will have two quirks for HEADER and FINAL
fixup separately as Bjorn suggested. One entry for one combination of
vendor/device will be put into section .pci_fixup_{header | final}.
The vendor/device IDs in the entries are compared with every PCI deivce
and the callbacks are invoked if they match. When number of entries in
section .pci_fixup_{header | final} increases, more time used to iterate
the section in pci_do_fixups(). The time will be saved if we can just
have one FINAL quirk as below, which probably makes code the easier to
be maintained.

static void mellanox_check_broken_intx_masking(struct pci_dev *pdev)
{
	if (pdev->device in broken_device_list)
		quirk_broken_intx_masking(pdev);
        else if (pdev->dev is CONNECT4)
                /* Check the firmware version to call quirk_broken_intx_masking() */
}

DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_MELLANOX, PCI_ANY_ID, PCI_ANY_ID,
			mellanox_check_broken_intx_masking);

Above comment bases on the fact: pdev->broken_intx_masking is used by
device driver only. We still need anohter one HEADER quirk for Mellanox
(not one for vendor/device ID combination) if we want to see finalized
pdev->broken_intx_masking before that point.

>This patch is really doing two separate things:
>
>  1) Changing from a blanket "all Mellanox devices" have broken INTx to a
>  specific "only these listed Mellanox devices" have broken INTx, and
>
>  2) Checking the firmware version of PCI_DEVICE_ID_MELLANOX_CONNECTX4 and
>  PCI_DEVICE_ID_MELLANOX_CONNECTX4_LX to see if INTx masking works.
>
>I'd like these to be split into two patches: one that sets
>broken_intx_masking for the list of Mellanox devices (including CONNECTX4),
>and a second that adds quirk_connectx_4_verify_fw() and calls it instead of
>quirk_broken_intx_masking() for the CONNECTX4 devices.
>
>That way, a hypothetical problem with a new Mellanox device that's not
>on the list will bisect to the first patch, where the solution is
>obvious, rather than to the combined patch, which would be much less
>obvious.
>
>> As a result, the VFIO driver fails to start when more than one device
>> function is passed-through to a VM if both have the same INTx pin.
>> 

I don't see how the issue is related to the patch. INTx pin can be shared
by multiple functions and it's not going to be changed by the patch. When
the functions aren't claimed for capability to mask INTx from device side,
the shared INTx (or hardware IRQ) has to be masked from host (IRQ controller)
side. It does affect multiple VMs who are sharing thise INTx pin. I'm not
sure if it's the problem you have?

>> Prior to Connect-IB, Mellanox devices exposed to the operating system
>> one PCI function per all ports.
>> Starting from Connect-IB, the devices are function-per-port. When
>> passing the second function to a VM, VFIO will fail to start.
>> 
>> Exclude ConnectX-4, ConnectX4-Lx and Connect-IB from the list of
>> Mellanox devices marked as having broken INTx masking:
>> 
>> - ConnectX-4 and ConnectX4-LX firmware version is checked. If INTx
>>   masking is supported, we unmark the broken INTx masking.
>> - Connect-IB does not support INTx currently so will not cause any
>>   problem.
>> 
>> Fixes: 11e42532ada31 ('PCI: Assume all Mellanox devices have ...')
>> Signed-off-by: Noa Osherovich <noaos@mellanox.com>
>> Reviewed-by: Or Gerlitz <ogerlitz@mellanox.com>
>> --
>> Previous patch version can be found here:
>> https://patchwork.ozlabs.org/patch/612222/
>> ---
>>  drivers/pci/quirks.c    | 111 +++++++++++++++++++++++++++++++++++++++++++++++-
>>  include/linux/pci_ids.h |  16 +++++++
>>  2 files changed, 126 insertions(+), 1 deletion(-)
>> 
>> diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
>> index c232729f5b1b..9e6d6aafc2ab 100644
>> --- a/drivers/pci/quirks.c
>> +++ b/drivers/pci/quirks.c
>> @@ -3158,8 +3158,117 @@ static void quirk_broken_intx_masking(struct pci_dev *dev)
>>   */
>>  DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_REALTEK, 0x8169,
>>  			 quirk_broken_intx_masking);
>> -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX, PCI_ANY_ID,
>> +
>> +#define CONNECTX_4_CURR_MAX_MINOR 99
>> +#define CONNECTX_4_INTX_SUPPORT_MINOR 14
>> +
>> +/*
>> + * Checking ConnectX-4/LX FW version to see if it supports legacy interrupts.
>> + * If so, don't mark it as broken.
>> + * FW minor > 99 means older FW version format and no INTx masking support.
>> + * FW minor < 14 means new FW version format and no INTx masking support.
>> + */
>> +static void quirk_connectx_4_verify_fw(struct pci_dev *dev)
>> +{
>> +	u16 pmcsr;
>> +	u8 __iomem *fw_ver;
>> +	u8 fw_minor;
>> +
>> +	dev->broken_intx_masking = 1;
>> +
>> +	/*
>> +	 * Check that the device is in the D0 power state. If it's not,
>> +	 * there is no point to look any further.
>> +	 */
>> +	if (dev->pm_cap) {
>> +		pci_read_config_word(dev, dev->pm_cap + PCI_PM_CTRL, &pmcsr);
>> +		if ((pmcsr & PCI_PM_CTRL_STATE_MASK) != PCI_D0)
>> +			return;
>> +	}
>> +
>> +	/* Convert from PCI bus to resource space.  */
>> +	fw_ver = ioremap(pci_resource_start(dev, 0), 2);
>
>This is a little problematic.  This is run as a HEADER quirk, and the
>ioremap() depends on the BAR being initialized.  On x86, the BAR
>likely has been initialized by the BIOS, but that's not true on all
>arches, so we can't rely on it.
>
>I think you probably should make this a FINAL quirk and call
>pci_enable_device_mem() first, then use pci_iomap().  That way all the
>BARs will have been assigned, and it also means you don't need to
>worry about state D0 above, because pci_enable_device_mem() will make
>sure that the device is in D0.
>
>> +	if (!fw_ver) {
>> +		dev_warn(&dev->dev, "Can't map ConnectX-4 initialization segment\n");
>> +		return;
>> +	}
>> +
>> +	fw_minor = readb(fw_ver + 1);
>> +	if (fw_minor > CONNECTX_4_CURR_MAX_MINOR ||
>> +	    fw_minor < CONNECTX_4_INTX_SUPPORT_MINOR)
>> +		dev_warn(&dev->dev, "ConnectX-4: FW doesn't support INTx masking, disabling. Please upgrade FW for INTx support\n");
>
>Can you print the current firmware version and maybe the required one
>here?  I think that might help the user figure out how to fix the
>problem.
>
>> +	else
>> +		dev->broken_intx_masking = 0;
>> +
>> +	iounmap(fw_ver);
>> +}
>> +
>> +/*
>> + * Mellanox devices that fail under PCI device assignment using DisINTx masking
>> + */
>> +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX, PCI_DEVICE_ID_MELLANOX_TAVOR,
>> +			 quirk_broken_intx_masking);
>> +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX,
>> +			 PCI_DEVICE_ID_MELLANOX_TAVOR_BRIDGE,
>> +			 quirk_broken_intx_masking);
>> +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX, PCI_DEVICE_ID_MELLANOX_ARBEL,
>> +			 quirk_broken_intx_masking);
>> +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX,
>> +			 PCI_DEVICE_ID_MELLANOX_ARBEL_COMPAT,
>> +			 quirk_broken_intx_masking);
>> +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX, PCI_DEVICE_ID_MELLANOX_SINAI,
>> +			 quirk_broken_intx_masking);
>> +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX,
>> +			 PCI_DEVICE_ID_MELLANOX_SINAI_OLD,
>> +			 quirk_broken_intx_masking);
>> +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX,
>> +			 PCI_DEVICE_ID_MELLANOX_HERMON_SDR,
>> +			 quirk_broken_intx_masking);
>> +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX,
>> +			 PCI_DEVICE_ID_MELLANOX_HERMON_DDR,
>> +			 quirk_broken_intx_masking);
>> +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX,
>> +			 PCI_DEVICE_ID_MELLANOX_HERMON_QDR,
>> +			 quirk_broken_intx_masking);
>> +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX,
>> +			 PCI_DEVICE_ID_MELLANOX_HERMON_DDR_GEN2,
>> +			 quirk_broken_intx_masking);
>> +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX,
>> +			 PCI_DEVICE_ID_MELLANOX_HERMON_QDR_GEN2,
>> +			 quirk_broken_intx_masking);
>> +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX,
>> +			 PCI_DEVICE_ID_MELLANOX_HERMON_EN,
>> +			 quirk_broken_intx_masking);
>> +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX,
>> +			 PCI_DEVICE_ID_MELLANOX_HERMON_EN_GEN2,
>> +			 quirk_broken_intx_masking);
>> +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX,
>> +			 PCI_DEVICE_ID_MELLANOX_CONNECTX_EN,
>> +			 quirk_broken_intx_masking);
>> +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX,
>> +			 PCI_DEVICE_ID_MELLANOX_CONNECTX_EN_T_GEN2,
>> +			 quirk_broken_intx_masking);
>> +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX,
>> +			 PCI_DEVICE_ID_MELLANOX_CONNECTX_EN_GEN2,
>> +			 quirk_broken_intx_masking);
>> +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX,
>> +			 PCI_DEVICE_ID_MELLANOX_CONNECTX_EN_5_GEN2,
>> +			 quirk_broken_intx_masking);
>> +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX,
>> +			 PCI_DEVICE_ID_MELLANOX_CONNECTX2,
>> +			 quirk_broken_intx_masking);
>> +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX,
>> +			 PCI_DEVICE_ID_MELLANOX_CONNECTX3,
>> +			 quirk_broken_intx_masking);
>> +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX,
>> +			 PCI_DEVICE_ID_MELLANOX_CONNECTX3_PRO,
>>  			 quirk_broken_intx_masking);
>> +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX,
>> +			 PCI_DEVICE_ID_MELLANOX_CONNECTX4,
>> +			 quirk_connectx_4_verify_fw);
>> +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX,
>> +			 PCI_DEVICE_ID_MELLANOX_CONNECTX4_LX,
>> +			 quirk_connectx_4_verify_fw);
>>  
>>  /*
>>   * Intel i40e (XL710/X710) 10/20/40GbE NICs all have broken INTx masking,
>> diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
>> index c58752fe16c4..1c742842e76c 100644
>> --- a/include/linux/pci_ids.h
>> +++ b/include/linux/pci_ids.h
>> @@ -2262,6 +2262,22 @@
>>  #define PCI_DEVICE_ID_MELLANOX_ARBEL	0x6282
>>  #define PCI_DEVICE_ID_MELLANOX_SINAI_OLD 0x5e8c
>>  #define PCI_DEVICE_ID_MELLANOX_SINAI	0x6274
>> +#define PCI_DEVICE_ID_MELLANOX_HERMON_SDR 0x6340
>> +#define PCI_DEVICE_ID_MELLANOX_HERMON_DDR 0x634a
>> +#define PCI_DEVICE_ID_MELLANOX_HERMON_QDR 0x6354
>> +#define PCI_DEVICE_ID_MELLANOX_HERMON_DDR_GEN2 0x6732
>> +#define PCI_DEVICE_ID_MELLANOX_HERMON_QDR_GEN2 0x673c
>> +#define PCI_DEVICE_ID_MELLANOX_HERMON_EN 0x6368
>> +#define PCI_DEVICE_ID_MELLANOX_HERMON_EN_GEN2 0x6750
>> +#define PCI_DEVICE_ID_MELLANOX_CONNECTX_EN 0x6372
>> +#define PCI_DEVICE_ID_MELLANOX_CONNECTX_EN_T_GEN2 0x675a
>> +#define PCI_DEVICE_ID_MELLANOX_CONNECTX_EN_GEN2 0x6764
>> +#define PCI_DEVICE_ID_MELLANOX_CONNECTX_EN_5_GEN2 0x6746
>> +#define PCI_DEVICE_ID_MELLANOX_CONNECTX2 0x676e
>> +#define PCI_DEVICE_ID_MELLANOX_CONNECTX3 0x1003
>> +#define PCI_DEVICE_ID_MELLANOX_CONNECTX3_PRO 0x1007
>> +#define PCI_DEVICE_ID_MELLANOX_CONNECTX4 0x1013
>> +#define PCI_DEVICE_ID_MELLANOX_CONNECTX4_LX 0x1015
>>  
>>  #define PCI_VENDOR_ID_DFI		0x15bd
>>  
>> -- 
>> 1.8.3.1
>> 
>> --
>> To unsubscribe from this list: send the line "unsubscribe linux-pci" in
>> the body of a message to majordomo@vger.kernel.org
>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] PCI: Refine broken INTx masking for Mellanox devices
  2016-11-01 14:47 Noa Osherovich
@ 2016-11-09 18:22 ` Bjorn Helgaas
  2016-11-10  3:46   ` Gavin Shan
  0 siblings, 1 reply; 13+ messages in thread
From: Bjorn Helgaas @ 2016-11-09 18:22 UTC (permalink / raw)
  To: Noa Osherovich
  Cc: bhelgaas, linux-pci, Gavin Shan, Amir Vadai, Majd Dibbiny,
	Alex Williamson

[+cc Gavin, Amir, Majd, Alex]

Hi Noa,

On Tue, Nov 01, 2016 at 04:47:24PM +0200, Noa Osherovich wrote:
> Mellanox devices were marked as having INTx masking ability broken.

This was done by 11e42532ada3 ("PCI: Assume all Mellanox devices have
broken INTx masking"), by Gavin Shan.  Possibly that was a little too
aggressive.  I cc'd Gavin and Amir so they can comment.

This patch is really doing two separate things:

  1) Changing from a blanket "all Mellanox devices" have broken INTx to a
  specific "only these listed Mellanox devices" have broken INTx, and

  2) Checking the firmware version of PCI_DEVICE_ID_MELLANOX_CONNECTX4 and
  PCI_DEVICE_ID_MELLANOX_CONNECTX4_LX to see if INTx masking works.

I'd like these to be split into two patches: one that sets
broken_intx_masking for the list of Mellanox devices (including CONNECTX4),
and a second that adds quirk_connectx_4_verify_fw() and calls it instead of
quirk_broken_intx_masking() for the CONNECTX4 devices.

That way, a hypothetical problem with a new Mellanox device that's not
on the list will bisect to the first patch, where the solution is
obvious, rather than to the combined patch, which would be much less
obvious.

> As a result, the VFIO driver fails to start when more than one device
> function is passed-through to a VM if both have the same INTx pin.
> 
> Prior to Connect-IB, Mellanox devices exposed to the operating system
> one PCI function per all ports.
> Starting from Connect-IB, the devices are function-per-port. When
> passing the second function to a VM, VFIO will fail to start.
> 
> Exclude ConnectX-4, ConnectX4-Lx and Connect-IB from the list of
> Mellanox devices marked as having broken INTx masking:
> 
> - ConnectX-4 and ConnectX4-LX firmware version is checked. If INTx
>   masking is supported, we unmark the broken INTx masking.
> - Connect-IB does not support INTx currently so will not cause any
>   problem.
> 
> Fixes: 11e42532ada31 ('PCI: Assume all Mellanox devices have ...')
> Signed-off-by: Noa Osherovich <noaos@mellanox.com>
> Reviewed-by: Or Gerlitz <ogerlitz@mellanox.com>
> --
> Previous patch version can be found here:
> https://patchwork.ozlabs.org/patch/612222/
> ---
>  drivers/pci/quirks.c    | 111 +++++++++++++++++++++++++++++++++++++++++++++++-
>  include/linux/pci_ids.h |  16 +++++++
>  2 files changed, 126 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
> index c232729f5b1b..9e6d6aafc2ab 100644
> --- a/drivers/pci/quirks.c
> +++ b/drivers/pci/quirks.c
> @@ -3158,8 +3158,117 @@ static void quirk_broken_intx_masking(struct pci_dev *dev)
>   */
>  DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_REALTEK, 0x8169,
>  			 quirk_broken_intx_masking);
> -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX, PCI_ANY_ID,
> +
> +#define CONNECTX_4_CURR_MAX_MINOR 99
> +#define CONNECTX_4_INTX_SUPPORT_MINOR 14
> +
> +/*
> + * Checking ConnectX-4/LX FW version to see if it supports legacy interrupts.
> + * If so, don't mark it as broken.
> + * FW minor > 99 means older FW version format and no INTx masking support.
> + * FW minor < 14 means new FW version format and no INTx masking support.
> + */
> +static void quirk_connectx_4_verify_fw(struct pci_dev *dev)
> +{
> +	u16 pmcsr;
> +	u8 __iomem *fw_ver;
> +	u8 fw_minor;
> +
> +	dev->broken_intx_masking = 1;
> +
> +	/*
> +	 * Check that the device is in the D0 power state. If it's not,
> +	 * there is no point to look any further.
> +	 */
> +	if (dev->pm_cap) {
> +		pci_read_config_word(dev, dev->pm_cap + PCI_PM_CTRL, &pmcsr);
> +		if ((pmcsr & PCI_PM_CTRL_STATE_MASK) != PCI_D0)
> +			return;
> +	}
> +
> +	/* Convert from PCI bus to resource space.  */
> +	fw_ver = ioremap(pci_resource_start(dev, 0), 2);

This is a little problematic.  This is run as a HEADER quirk, and the
ioremap() depends on the BAR being initialized.  On x86, the BAR
likely has been initialized by the BIOS, but that's not true on all
arches, so we can't rely on it.

I think you probably should make this a FINAL quirk and call
pci_enable_device_mem() first, then use pci_iomap().  That way all the
BARs will have been assigned, and it also means you don't need to
worry about state D0 above, because pci_enable_device_mem() will make
sure that the device is in D0.

> +	if (!fw_ver) {
> +		dev_warn(&dev->dev, "Can't map ConnectX-4 initialization segment\n");
> +		return;
> +	}
> +
> +	fw_minor = readb(fw_ver + 1);
> +	if (fw_minor > CONNECTX_4_CURR_MAX_MINOR ||
> +	    fw_minor < CONNECTX_4_INTX_SUPPORT_MINOR)
> +		dev_warn(&dev->dev, "ConnectX-4: FW doesn't support INTx masking, disabling. Please upgrade FW for INTx support\n");

Can you print the current firmware version and maybe the required one
here?  I think that might help the user figure out how to fix the
problem.

> +	else
> +		dev->broken_intx_masking = 0;
> +
> +	iounmap(fw_ver);
> +}
> +
> +/*
> + * Mellanox devices that fail under PCI device assignment using DisINTx masking
> + */
> +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX, PCI_DEVICE_ID_MELLANOX_TAVOR,
> +			 quirk_broken_intx_masking);
> +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX,
> +			 PCI_DEVICE_ID_MELLANOX_TAVOR_BRIDGE,
> +			 quirk_broken_intx_masking);
> +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX, PCI_DEVICE_ID_MELLANOX_ARBEL,
> +			 quirk_broken_intx_masking);
> +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX,
> +			 PCI_DEVICE_ID_MELLANOX_ARBEL_COMPAT,
> +			 quirk_broken_intx_masking);
> +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX, PCI_DEVICE_ID_MELLANOX_SINAI,
> +			 quirk_broken_intx_masking);
> +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX,
> +			 PCI_DEVICE_ID_MELLANOX_SINAI_OLD,
> +			 quirk_broken_intx_masking);
> +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX,
> +			 PCI_DEVICE_ID_MELLANOX_HERMON_SDR,
> +			 quirk_broken_intx_masking);
> +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX,
> +			 PCI_DEVICE_ID_MELLANOX_HERMON_DDR,
> +			 quirk_broken_intx_masking);
> +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX,
> +			 PCI_DEVICE_ID_MELLANOX_HERMON_QDR,
> +			 quirk_broken_intx_masking);
> +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX,
> +			 PCI_DEVICE_ID_MELLANOX_HERMON_DDR_GEN2,
> +			 quirk_broken_intx_masking);
> +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX,
> +			 PCI_DEVICE_ID_MELLANOX_HERMON_QDR_GEN2,
> +			 quirk_broken_intx_masking);
> +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX,
> +			 PCI_DEVICE_ID_MELLANOX_HERMON_EN,
> +			 quirk_broken_intx_masking);
> +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX,
> +			 PCI_DEVICE_ID_MELLANOX_HERMON_EN_GEN2,
> +			 quirk_broken_intx_masking);
> +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX,
> +			 PCI_DEVICE_ID_MELLANOX_CONNECTX_EN,
> +			 quirk_broken_intx_masking);
> +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX,
> +			 PCI_DEVICE_ID_MELLANOX_CONNECTX_EN_T_GEN2,
> +			 quirk_broken_intx_masking);
> +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX,
> +			 PCI_DEVICE_ID_MELLANOX_CONNECTX_EN_GEN2,
> +			 quirk_broken_intx_masking);
> +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX,
> +			 PCI_DEVICE_ID_MELLANOX_CONNECTX_EN_5_GEN2,
> +			 quirk_broken_intx_masking);
> +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX,
> +			 PCI_DEVICE_ID_MELLANOX_CONNECTX2,
> +			 quirk_broken_intx_masking);
> +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX,
> +			 PCI_DEVICE_ID_MELLANOX_CONNECTX3,
> +			 quirk_broken_intx_masking);
> +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX,
> +			 PCI_DEVICE_ID_MELLANOX_CONNECTX3_PRO,
>  			 quirk_broken_intx_masking);
> +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX,
> +			 PCI_DEVICE_ID_MELLANOX_CONNECTX4,
> +			 quirk_connectx_4_verify_fw);
> +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX,
> +			 PCI_DEVICE_ID_MELLANOX_CONNECTX4_LX,
> +			 quirk_connectx_4_verify_fw);
>  
>  /*
>   * Intel i40e (XL710/X710) 10/20/40GbE NICs all have broken INTx masking,
> diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
> index c58752fe16c4..1c742842e76c 100644
> --- a/include/linux/pci_ids.h
> +++ b/include/linux/pci_ids.h
> @@ -2262,6 +2262,22 @@
>  #define PCI_DEVICE_ID_MELLANOX_ARBEL	0x6282
>  #define PCI_DEVICE_ID_MELLANOX_SINAI_OLD 0x5e8c
>  #define PCI_DEVICE_ID_MELLANOX_SINAI	0x6274
> +#define PCI_DEVICE_ID_MELLANOX_HERMON_SDR 0x6340
> +#define PCI_DEVICE_ID_MELLANOX_HERMON_DDR 0x634a
> +#define PCI_DEVICE_ID_MELLANOX_HERMON_QDR 0x6354
> +#define PCI_DEVICE_ID_MELLANOX_HERMON_DDR_GEN2 0x6732
> +#define PCI_DEVICE_ID_MELLANOX_HERMON_QDR_GEN2 0x673c
> +#define PCI_DEVICE_ID_MELLANOX_HERMON_EN 0x6368
> +#define PCI_DEVICE_ID_MELLANOX_HERMON_EN_GEN2 0x6750
> +#define PCI_DEVICE_ID_MELLANOX_CONNECTX_EN 0x6372
> +#define PCI_DEVICE_ID_MELLANOX_CONNECTX_EN_T_GEN2 0x675a
> +#define PCI_DEVICE_ID_MELLANOX_CONNECTX_EN_GEN2 0x6764
> +#define PCI_DEVICE_ID_MELLANOX_CONNECTX_EN_5_GEN2 0x6746
> +#define PCI_DEVICE_ID_MELLANOX_CONNECTX2 0x676e
> +#define PCI_DEVICE_ID_MELLANOX_CONNECTX3 0x1003
> +#define PCI_DEVICE_ID_MELLANOX_CONNECTX3_PRO 0x1007
> +#define PCI_DEVICE_ID_MELLANOX_CONNECTX4 0x1013
> +#define PCI_DEVICE_ID_MELLANOX_CONNECTX4_LX 0x1015
>  
>  #define PCI_VENDOR_ID_DFI		0x15bd
>  
> -- 
> 1.8.3.1
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-pci" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 13+ messages in thread

* [PATCH] PCI: Refine broken INTx masking for Mellanox devices
@ 2016-11-01 14:47 Noa Osherovich
  2016-11-09 18:22 ` Bjorn Helgaas
  0 siblings, 1 reply; 13+ messages in thread
From: Noa Osherovich @ 2016-11-01 14:47 UTC (permalink / raw)
  To: bhelgaas; +Cc: linux-pci, noaos

Mellanox devices were marked as having INTx masking ability broken.
As a result, the VFIO driver fails to start when more than one device
function is passed-through to a VM if both have the same INTx pin.

Prior to Connect-IB, Mellanox devices exposed to the operating system
one PCI function per all ports.
Starting from Connect-IB, the devices are function-per-port. When
passing the second function to a VM, VFIO will fail to start.

Exclude ConnectX-4, ConnectX4-Lx and Connect-IB from the list of
Mellanox devices marked as having broken INTx masking:

- ConnectX-4 and ConnectX4-LX firmware version is checked. If INTx
  masking is supported, we unmark the broken INTx masking.
- Connect-IB does not support INTx currently so will not cause any
  problem.

Fixes: 11e42532ada31 ('PCI: Assume all Mellanox devices have ...')
Signed-off-by: Noa Osherovich <noaos@mellanox.com>
Reviewed-by: Or Gerlitz <ogerlitz@mellanox.com>
--
Previous patch version can be found here:
https://patchwork.ozlabs.org/patch/612222/
---
 drivers/pci/quirks.c    | 111 +++++++++++++++++++++++++++++++++++++++++++++++-
 include/linux/pci_ids.h |  16 +++++++
 2 files changed, 126 insertions(+), 1 deletion(-)

diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
index c232729f5b1b..9e6d6aafc2ab 100644
--- a/drivers/pci/quirks.c
+++ b/drivers/pci/quirks.c
@@ -3158,8 +3158,117 @@ static void quirk_broken_intx_masking(struct pci_dev *dev)
  */
 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_REALTEK, 0x8169,
 			 quirk_broken_intx_masking);
-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX, PCI_ANY_ID,
+
+#define CONNECTX_4_CURR_MAX_MINOR 99
+#define CONNECTX_4_INTX_SUPPORT_MINOR 14
+
+/*
+ * Checking ConnectX-4/LX FW version to see if it supports legacy interrupts.
+ * If so, don't mark it as broken.
+ * FW minor > 99 means older FW version format and no INTx masking support.
+ * FW minor < 14 means new FW version format and no INTx masking support.
+ */
+static void quirk_connectx_4_verify_fw(struct pci_dev *dev)
+{
+	u16 pmcsr;
+	u8 __iomem *fw_ver;
+	u8 fw_minor;
+
+	dev->broken_intx_masking = 1;
+
+	/*
+	 * Check that the device is in the D0 power state. If it's not,
+	 * there is no point to look any further.
+	 */
+	if (dev->pm_cap) {
+		pci_read_config_word(dev, dev->pm_cap + PCI_PM_CTRL, &pmcsr);
+		if ((pmcsr & PCI_PM_CTRL_STATE_MASK) != PCI_D0)
+			return;
+	}
+
+	/* Convert from PCI bus to resource space.  */
+	fw_ver = ioremap(pci_resource_start(dev, 0), 2);
+	if (!fw_ver) {
+		dev_warn(&dev->dev, "Can't map ConnectX-4 initialization segment\n");
+		return;
+	}
+
+	fw_minor = readb(fw_ver + 1);
+	if (fw_minor > CONNECTX_4_CURR_MAX_MINOR ||
+	    fw_minor < CONNECTX_4_INTX_SUPPORT_MINOR)
+		dev_warn(&dev->dev, "ConnectX-4: FW doesn't support INTx masking, disabling. Please upgrade FW for INTx support\n");
+	else
+		dev->broken_intx_masking = 0;
+
+	iounmap(fw_ver);
+}
+
+/*
+ * Mellanox devices that fail under PCI device assignment using DisINTx masking
+ */
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX, PCI_DEVICE_ID_MELLANOX_TAVOR,
+			 quirk_broken_intx_masking);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX,
+			 PCI_DEVICE_ID_MELLANOX_TAVOR_BRIDGE,
+			 quirk_broken_intx_masking);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX, PCI_DEVICE_ID_MELLANOX_ARBEL,
+			 quirk_broken_intx_masking);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX,
+			 PCI_DEVICE_ID_MELLANOX_ARBEL_COMPAT,
+			 quirk_broken_intx_masking);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX, PCI_DEVICE_ID_MELLANOX_SINAI,
+			 quirk_broken_intx_masking);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX,
+			 PCI_DEVICE_ID_MELLANOX_SINAI_OLD,
+			 quirk_broken_intx_masking);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX,
+			 PCI_DEVICE_ID_MELLANOX_HERMON_SDR,
+			 quirk_broken_intx_masking);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX,
+			 PCI_DEVICE_ID_MELLANOX_HERMON_DDR,
+			 quirk_broken_intx_masking);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX,
+			 PCI_DEVICE_ID_MELLANOX_HERMON_QDR,
+			 quirk_broken_intx_masking);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX,
+			 PCI_DEVICE_ID_MELLANOX_HERMON_DDR_GEN2,
+			 quirk_broken_intx_masking);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX,
+			 PCI_DEVICE_ID_MELLANOX_HERMON_QDR_GEN2,
+			 quirk_broken_intx_masking);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX,
+			 PCI_DEVICE_ID_MELLANOX_HERMON_EN,
+			 quirk_broken_intx_masking);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX,
+			 PCI_DEVICE_ID_MELLANOX_HERMON_EN_GEN2,
+			 quirk_broken_intx_masking);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX,
+			 PCI_DEVICE_ID_MELLANOX_CONNECTX_EN,
+			 quirk_broken_intx_masking);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX,
+			 PCI_DEVICE_ID_MELLANOX_CONNECTX_EN_T_GEN2,
+			 quirk_broken_intx_masking);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX,
+			 PCI_DEVICE_ID_MELLANOX_CONNECTX_EN_GEN2,
+			 quirk_broken_intx_masking);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX,
+			 PCI_DEVICE_ID_MELLANOX_CONNECTX_EN_5_GEN2,
+			 quirk_broken_intx_masking);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX,
+			 PCI_DEVICE_ID_MELLANOX_CONNECTX2,
+			 quirk_broken_intx_masking);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX,
+			 PCI_DEVICE_ID_MELLANOX_CONNECTX3,
+			 quirk_broken_intx_masking);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX,
+			 PCI_DEVICE_ID_MELLANOX_CONNECTX3_PRO,
 			 quirk_broken_intx_masking);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX,
+			 PCI_DEVICE_ID_MELLANOX_CONNECTX4,
+			 quirk_connectx_4_verify_fw);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX,
+			 PCI_DEVICE_ID_MELLANOX_CONNECTX4_LX,
+			 quirk_connectx_4_verify_fw);
 
 /*
  * Intel i40e (XL710/X710) 10/20/40GbE NICs all have broken INTx masking,
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index c58752fe16c4..1c742842e76c 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -2262,6 +2262,22 @@
 #define PCI_DEVICE_ID_MELLANOX_ARBEL	0x6282
 #define PCI_DEVICE_ID_MELLANOX_SINAI_OLD 0x5e8c
 #define PCI_DEVICE_ID_MELLANOX_SINAI	0x6274
+#define PCI_DEVICE_ID_MELLANOX_HERMON_SDR 0x6340
+#define PCI_DEVICE_ID_MELLANOX_HERMON_DDR 0x634a
+#define PCI_DEVICE_ID_MELLANOX_HERMON_QDR 0x6354
+#define PCI_DEVICE_ID_MELLANOX_HERMON_DDR_GEN2 0x6732
+#define PCI_DEVICE_ID_MELLANOX_HERMON_QDR_GEN2 0x673c
+#define PCI_DEVICE_ID_MELLANOX_HERMON_EN 0x6368
+#define PCI_DEVICE_ID_MELLANOX_HERMON_EN_GEN2 0x6750
+#define PCI_DEVICE_ID_MELLANOX_CONNECTX_EN 0x6372
+#define PCI_DEVICE_ID_MELLANOX_CONNECTX_EN_T_GEN2 0x675a
+#define PCI_DEVICE_ID_MELLANOX_CONNECTX_EN_GEN2 0x6764
+#define PCI_DEVICE_ID_MELLANOX_CONNECTX_EN_5_GEN2 0x6746
+#define PCI_DEVICE_ID_MELLANOX_CONNECTX2 0x676e
+#define PCI_DEVICE_ID_MELLANOX_CONNECTX3 0x1003
+#define PCI_DEVICE_ID_MELLANOX_CONNECTX3_PRO 0x1007
+#define PCI_DEVICE_ID_MELLANOX_CONNECTX4 0x1013
+#define PCI_DEVICE_ID_MELLANOX_CONNECTX4_LX 0x1015
 
 #define PCI_VENDOR_ID_DFI		0x15bd
 
-- 
1.8.3.1


^ permalink raw reply related	[flat|nested] 13+ messages in thread

end of thread, other threads:[~2016-11-10 23:22 UTC | newest]

Thread overview: 13+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-04-19 16:33 [PATCH] PCI: Refine broken INTx masking for Mellanox devices Majd Dibbiny
2016-04-19 19:04 ` Alex Williamson
2016-04-21  7:12   ` Majd Dibbiny
2016-04-25 12:45     ` Majd Dibbiny
2016-04-25 13:55       ` Alex Williamson
     [not found]         ` <5720CFA1.6030200@mellanox.com>
2016-04-28  7:55           ` Majd Dibbiny
2016-11-01 14:47 Noa Osherovich
2016-11-09 18:22 ` Bjorn Helgaas
2016-11-10  3:46   ` Gavin Shan
2016-11-10 15:00     ` Bjorn Helgaas
2016-11-10 15:58       ` Noa Osherovich
2016-11-10 15:56     ` Noa Osherovich
2016-11-10 23:22       ` Gavin Shan

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.