All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/1] drm/amdkfd: make needs_pcie_atomics FW-version dependent
@ 2021-08-31 21:56 Felix Kuehling
  2021-08-31 22:09 ` Zeng, Oak
  2021-09-01 11:04 ` Lazar, Lijo
  0 siblings, 2 replies; 9+ messages in thread
From: Felix Kuehling @ 2021-08-31 21:56 UTC (permalink / raw)
  To: amd-gfx

On some GPUs the PCIe atomic requirement for KFD depends on the MEC
firmware version. Add a firmware version check for this. The minimum
firmware version that works without atomics can be updated in the
device_info structure for each GPU type.

Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
---
 drivers/gpu/drm/amd/amdkfd/kfd_device.c | 9 +++++++--
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h   | 1 +
 2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index 16a57b70cc1a..655ee5733229 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -688,6 +688,7 @@ struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd,
 	struct kfd_dev *kfd;
 	const struct kfd_device_info *device_info;
 	const struct kfd2kgd_calls *f2g;
+	uint32_t fw_version;
 
 	if (asic_type >= sizeof(kfd_supported_devices) / (sizeof(void *) * 2)
 		|| asic_type >= sizeof(kfd2kgd_funcs) / sizeof(void *)) {
@@ -713,8 +714,12 @@ struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd,
 	 * supported.
 	 */
 	kfd->pci_atomic_requested = amdgpu_amdkfd_have_atomics_support(kgd);
-	if (device_info->needs_pci_atomics &&
-	    !kfd->pci_atomic_requested) {
+	fw_version = amdgpu_amdkfd_get_fw_version(kgd, KGD_ENGINE_MEC1);
+	if (!kfd->pci_atomic_requested &&
+	    device_info->needs_pci_atomics &&
+	    (!device_info->no_atomic_fw_version ||
+	      amdgpu_amdkfd_get_fw_version(kgd, KGD_ENGINE_MEC1) <
+			device_info->no_atomic_fw_version)) {
 		dev_info(kfd_device,
 			 "skipped device %x:%x, PCI rejects atomics\n",
 			 pdev->vendor, pdev->device);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index ab83b0de6b22..6d8f9bb2d905 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -207,6 +207,7 @@ struct kfd_device_info {
 	bool supports_cwsr;
 	bool needs_iommu_device;
 	bool needs_pci_atomics;
+	uint32_t no_atomic_fw_version;
 	unsigned int num_sdma_engines;
 	unsigned int num_xgmi_sdma_engines;
 	unsigned int num_sdma_queues_per_engine;
-- 
2.32.0


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [PATCH 1/1] drm/amdkfd: make needs_pcie_atomics FW-version dependent
  2021-08-31 21:56 [PATCH 1/1] drm/amdkfd: make needs_pcie_atomics FW-version dependent Felix Kuehling
@ 2021-08-31 22:09 ` Zeng, Oak
  2021-08-31 22:22   ` Felix Kuehling
  2021-09-01 11:04 ` Lazar, Lijo
  1 sibling, 1 reply; 9+ messages in thread
From: Zeng, Oak @ 2021-08-31 22:09 UTC (permalink / raw)
  To: Kuehling, Felix, amd-gfx

A nit-pick inline. Otherwise this patch is Reviewed-by: Oak Zeng <Oak.Zeng@amd.com>

Regards,
Oak 

 

On 2021-08-31, 5:57 PM, "amd-gfx on behalf of Felix Kuehling" <amd-gfx-bounces@lists.freedesktop.org on behalf of Felix.Kuehling@amd.com> wrote:

    On some GPUs the PCIe atomic requirement for KFD depends on the MEC
    firmware version. Add a firmware version check for this. The minimum
    firmware version that works without atomics can be updated in the
    device_info structure for each GPU type.

    Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
    ---
     drivers/gpu/drm/amd/amdkfd/kfd_device.c | 9 +++++++--
     drivers/gpu/drm/amd/amdkfd/kfd_priv.h   | 1 +
     2 files changed, 8 insertions(+), 2 deletions(-)

    diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
    index 16a57b70cc1a..655ee5733229 100644
    --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
    +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
    @@ -688,6 +688,7 @@ struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd,
     	struct kfd_dev *kfd;
     	const struct kfd_device_info *device_info;
     	const struct kfd2kgd_calls *f2g;
    +	uint32_t fw_version;

     	if (asic_type >= sizeof(kfd_supported_devices) / (sizeof(void *) * 2)
     		|| asic_type >= sizeof(kfd2kgd_funcs) / sizeof(void *)) {
    @@ -713,8 +714,12 @@ struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd,
     	 * supported.
     	 */
     	kfd->pci_atomic_requested = amdgpu_amdkfd_have_atomics_support(kgd);
    -	if (device_info->needs_pci_atomics &&
    -	    !kfd->pci_atomic_requested) {
    +	fw_version = amdgpu_amdkfd_get_fw_version(kgd, KGD_ENGINE_MEC1);
    +	if (!kfd->pci_atomic_requested &&
    +	    device_info->needs_pci_atomics &&
    +	    (!device_info->no_atomic_fw_version ||
    +	      amdgpu_amdkfd_get_fw_version(kgd, KGD_ENGINE_MEC1) <
You already get the fw_version above __
    +			device_info->no_atomic_fw_version)) {
     		dev_info(kfd_device,
     			 "skipped device %x:%x, PCI rejects atomics\n",
     			 pdev->vendor, pdev->device);
    diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
    index ab83b0de6b22..6d8f9bb2d905 100644
    --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
    +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
    @@ -207,6 +207,7 @@ struct kfd_device_info {
     	bool supports_cwsr;
     	bool needs_iommu_device;
     	bool needs_pci_atomics;
    +	uint32_t no_atomic_fw_version;
     	unsigned int num_sdma_engines;
     	unsigned int num_xgmi_sdma_engines;
     	unsigned int num_sdma_queues_per_engine;
    -- 
    2.32.0



^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 1/1] drm/amdkfd: make needs_pcie_atomics FW-version dependent
  2021-08-31 22:09 ` Zeng, Oak
@ 2021-08-31 22:22   ` Felix Kuehling
  0 siblings, 0 replies; 9+ messages in thread
From: Felix Kuehling @ 2021-08-31 22:22 UTC (permalink / raw)
  To: Zeng, Oak, amd-gfx

On 2021-08-31 6:09 p.m., Zeng, Oak wrote:
> A nit-pick inline. Otherwise this patch is Reviewed-by: Oak Zeng <Oak.Zeng@amd.com>
>
> Regards,
> Oak
>
>   
>
> On 2021-08-31, 5:57 PM, "amd-gfx on behalf of Felix Kuehling" <amd-gfx-bounces@lists.freedesktop.org on behalf of Felix.Kuehling@amd.com> wrote:
>
>      On some GPUs the PCIe atomic requirement for KFD depends on the MEC
>      firmware version. Add a firmware version check for this. The minimum
>      firmware version that works without atomics can be updated in the
>      device_info structure for each GPU type.
>
>      Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
>      ---
>       drivers/gpu/drm/amd/amdkfd/kfd_device.c | 9 +++++++--
>       drivers/gpu/drm/amd/amdkfd/kfd_priv.h   | 1 +
>       2 files changed, 8 insertions(+), 2 deletions(-)
>
>      diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
>      index 16a57b70cc1a..655ee5733229 100644
>      --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
>      +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
>      @@ -688,6 +688,7 @@ struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd,
>       	struct kfd_dev *kfd;
>       	const struct kfd_device_info *device_info;
>       	const struct kfd2kgd_calls *f2g;
>      +	uint32_t fw_version;
>
>       	if (asic_type >= sizeof(kfd_supported_devices) / (sizeof(void *) * 2)
>       		|| asic_type >= sizeof(kfd2kgd_funcs) / sizeof(void *)) {
>      @@ -713,8 +714,12 @@ struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd,
>       	 * supported.
>       	 */
>       	kfd->pci_atomic_requested = amdgpu_amdkfd_have_atomics_support(kgd);
>      -	if (device_info->needs_pci_atomics &&
>      -	    !kfd->pci_atomic_requested) {
>      +	fw_version = amdgpu_amdkfd_get_fw_version(kgd, KGD_ENGINE_MEC1);
>      +	if (!kfd->pci_atomic_requested &&
>      +	    device_info->needs_pci_atomics &&
>      +	    (!device_info->no_atomic_fw_version ||
>      +	      amdgpu_amdkfd_get_fw_version(kgd, KGD_ENGINE_MEC1) <
> You already get the fw_version above __

I'll fix that. I forgot to remove the local variable after I decided to 
move the function call into the condition.

Thanks,
   Felix


>      +			device_info->no_atomic_fw_version)) {
>       		dev_info(kfd_device,
>       			 "skipped device %x:%x, PCI rejects atomics\n",
>       			 pdev->vendor, pdev->device);
>      diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
>      index ab83b0de6b22..6d8f9bb2d905 100644
>      --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
>      +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
>      @@ -207,6 +207,7 @@ struct kfd_device_info {
>       	bool supports_cwsr;
>       	bool needs_iommu_device;
>       	bool needs_pci_atomics;
>      +	uint32_t no_atomic_fw_version;
>       	unsigned int num_sdma_engines;
>       	unsigned int num_xgmi_sdma_engines;
>       	unsigned int num_sdma_queues_per_engine;
>      --
>      2.32.0
>
>

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 1/1] drm/amdkfd: make needs_pcie_atomics FW-version dependent
  2021-08-31 21:56 [PATCH 1/1] drm/amdkfd: make needs_pcie_atomics FW-version dependent Felix Kuehling
  2021-08-31 22:09 ` Zeng, Oak
@ 2021-09-01 11:04 ` Lazar, Lijo
  2021-09-01 14:54   ` Felix Kuehling
  1 sibling, 1 reply; 9+ messages in thread
From: Lazar, Lijo @ 2021-09-01 11:04 UTC (permalink / raw)
  To: Felix Kuehling, amd-gfx



On 9/1/2021 3:26 AM, Felix Kuehling wrote:
> On some GPUs the PCIe atomic requirement for KFD depends on the MEC
> firmware version. Add a firmware version check for this. The minimum
> firmware version that works without atomics can be updated in the
> device_info structure for each GPU type.
> 
> Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
> ---
>   drivers/gpu/drm/amd/amdkfd/kfd_device.c | 9 +++++++--
>   drivers/gpu/drm/amd/amdkfd/kfd_priv.h   | 1 +
>   2 files changed, 8 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> index 16a57b70cc1a..655ee5733229 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> @@ -688,6 +688,7 @@ struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd,
>   	struct kfd_dev *kfd;
>   	const struct kfd_device_info *device_info;
>   	const struct kfd2kgd_calls *f2g;
> +	uint32_t fw_version;
>   
>   	if (asic_type >= sizeof(kfd_supported_devices) / (sizeof(void *) * 2)
>   		|| asic_type >= sizeof(kfd2kgd_funcs) / sizeof(void *)) {
> @@ -713,8 +714,12 @@ struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd,
>   	 * supported.
>   	 */
>   	kfd->pci_atomic_requested = amdgpu_amdkfd_have_atomics_support(kgd);

Should the check be grouped inside amdgpu_amdkfd_have_atomics_support?

This flag is used for setting some link properties. If there is HW 
support but comes with incompatible firmware, should the link be still 
marked as atomic?

Thanks,
Lijo

> -	if (device_info->needs_pci_atomics &&
> -	    !kfd->pci_atomic_requested) {
> +	fw_version = amdgpu_amdkfd_get_fw_version(kgd, KGD_ENGINE_MEC1);
> +	if (!kfd->pci_atomic_requested &&
> +	    device_info->needs_pci_atomics &&
> +	    (!device_info->no_atomic_fw_version ||
> +	      amdgpu_amdkfd_get_fw_version(kgd, KGD_ENGINE_MEC1) <
> +			device_info->no_atomic_fw_version)) {
>   		dev_info(kfd_device,
>   			 "skipped device %x:%x, PCI rejects atomics\n",
>   			 pdev->vendor, pdev->device);
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> index ab83b0de6b22..6d8f9bb2d905 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> @@ -207,6 +207,7 @@ struct kfd_device_info {
>   	bool supports_cwsr;
>   	bool needs_iommu_device;
>   	bool needs_pci_atomics;
> +	uint32_t no_atomic_fw_version;
>   	unsigned int num_sdma_engines;
>   	unsigned int num_xgmi_sdma_engines;
>   	unsigned int num_sdma_queues_per_engine;
> 

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 1/1] drm/amdkfd: make needs_pcie_atomics FW-version dependent
  2021-09-01 11:04 ` Lazar, Lijo
@ 2021-09-01 14:54   ` Felix Kuehling
  2021-09-01 16:30     ` Lazar, Lijo
  0 siblings, 1 reply; 9+ messages in thread
From: Felix Kuehling @ 2021-09-01 14:54 UTC (permalink / raw)
  To: Lazar, Lijo, amd-gfx

Am 2021-09-01 um 7:04 a.m. schrieb Lazar, Lijo:
>
>
> On 9/1/2021 3:26 AM, Felix Kuehling wrote:
>> On some GPUs the PCIe atomic requirement for KFD depends on the MEC
>> firmware version. Add a firmware version check for this. The minimum
>> firmware version that works without atomics can be updated in the
>> device_info structure for each GPU type.
>>
>> Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
>> ---
>>   drivers/gpu/drm/amd/amdkfd/kfd_device.c | 9 +++++++--
>>   drivers/gpu/drm/amd/amdkfd/kfd_priv.h   | 1 +
>>   2 files changed, 8 insertions(+), 2 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
>> b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
>> index 16a57b70cc1a..655ee5733229 100644
>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
>> @@ -688,6 +688,7 @@ struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd,
>>       struct kfd_dev *kfd;
>>       const struct kfd_device_info *device_info;
>>       const struct kfd2kgd_calls *f2g;
>> +    uint32_t fw_version;
>>         if (asic_type >= sizeof(kfd_supported_devices) / (sizeof(void
>> *) * 2)
>>           || asic_type >= sizeof(kfd2kgd_funcs) / sizeof(void *)) {
>> @@ -713,8 +714,12 @@ struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd,
>>        * supported.
>>        */
>>       kfd->pci_atomic_requested =
>> amdgpu_amdkfd_have_atomics_support(kgd);
>
> Should the check be grouped inside amdgpu_amdkfd_have_atomics_support?
>
> This flag is used for setting some link properties. If there is HW
> support but comes with incompatible firmware, should the link be still
> marked as atomic?

Our GPU HW always supports PCIe atomics (it's part of the PCIe 3 spec).
But some mainboards with older PCIe chipsets do not. Sometimes even
different ports on the same mainboard differ in their PCIe version and
atomic support.

amdgpu_device_init always tries to enable atomics on the root port an
all the bridges leading to the GPU by calling
pci_enable_atomic_ops_to_root. The result is saved in
adev->have_atomics_support, which is returned to KFD by
amdgpu_amdkfd_have_atomics_support.

The firmware change here does not affect whether atomics are
_supported_. It changes whether atomics are _required_ for the basic
operation of AQL user mode queues. The coming firmware update will
remove that requirement, which allows us to enable KFD for these GPUs+FW
on systems without PCIe atomics.

Enabling PCIe atomics with the updated FW is still beneficial because
shader programs can use a subset of atomic instructions for accessing
system memory atomically on supported systems.

Regards,
  Felix


>
> Thanks,
> Lijo
>
>> -    if (device_info->needs_pci_atomics &&
>> -        !kfd->pci_atomic_requested) {
>> +    fw_version = amdgpu_amdkfd_get_fw_version(kgd, KGD_ENGINE_MEC1);
>> +    if (!kfd->pci_atomic_requested &&
>> +        device_info->needs_pci_atomics &&
>> +        (!device_info->no_atomic_fw_version ||
>> +          amdgpu_amdkfd_get_fw_version(kgd, KGD_ENGINE_MEC1) <
>> +            device_info->no_atomic_fw_version)) {
>>           dev_info(kfd_device,
>>                "skipped device %x:%x, PCI rejects atomics\n",
>>                pdev->vendor, pdev->device);
>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
>> b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
>> index ab83b0de6b22..6d8f9bb2d905 100644
>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
>> @@ -207,6 +207,7 @@ struct kfd_device_info {
>>       bool supports_cwsr;
>>       bool needs_iommu_device;
>>       bool needs_pci_atomics;
>> +    uint32_t no_atomic_fw_version;
>>       unsigned int num_sdma_engines;
>>       unsigned int num_xgmi_sdma_engines;
>>       unsigned int num_sdma_queues_per_engine;
>>

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 1/1] drm/amdkfd: make needs_pcie_atomics FW-version dependent
  2021-09-01 14:54   ` Felix Kuehling
@ 2021-09-01 16:30     ` Lazar, Lijo
  2021-09-01 16:37       ` Alex Deucher
  2021-09-01 16:47       ` Felix Kuehling
  0 siblings, 2 replies; 9+ messages in thread
From: Lazar, Lijo @ 2021-09-01 16:30 UTC (permalink / raw)
  To: Kuehling, Felix, amd-gfx

[-- Attachment #1: Type: text/plain, Size: 4654 bytes --]

[Public]

What I wanted to ask was -

Whether user mode application relies only on link properties alone to assume atomic ops are supported? If they check only link properties and if the firmware doesn't work fine, should it be still marked as supported?

Basically, what is the purpose of exposing atomic capability in link properties and whether that can be utilised by upper mode applications just based on PCIe atomics support?

Thanks,
Lijo
________________________________
From: Kuehling, Felix <Felix.Kuehling@amd.com>
Sent: Wednesday, September 1, 2021 8:24:56 PM
To: Lazar, Lijo <Lijo.Lazar@amd.com>; amd-gfx@lists.freedesktop.org <amd-gfx@lists.freedesktop.org>
Subject: Re: [PATCH 1/1] drm/amdkfd: make needs_pcie_atomics FW-version dependent

Am 2021-09-01 um 7:04 a.m. schrieb Lazar, Lijo:
>
>
> On 9/1/2021 3:26 AM, Felix Kuehling wrote:
>> On some GPUs the PCIe atomic requirement for KFD depends on the MEC
>> firmware version. Add a firmware version check for this. The minimum
>> firmware version that works without atomics can be updated in the
>> device_info structure for each GPU type.
>>
>> Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
>> ---
>>   drivers/gpu/drm/amd/amdkfd/kfd_device.c | 9 +++++++--
>>   drivers/gpu/drm/amd/amdkfd/kfd_priv.h   | 1 +
>>   2 files changed, 8 insertions(+), 2 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
>> b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
>> index 16a57b70cc1a..655ee5733229 100644
>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
>> @@ -688,6 +688,7 @@ struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd,
>>       struct kfd_dev *kfd;
>>       const struct kfd_device_info *device_info;
>>       const struct kfd2kgd_calls *f2g;
>> +    uint32_t fw_version;
>>         if (asic_type >= sizeof(kfd_supported_devices) / (sizeof(void
>> *) * 2)
>>           || asic_type >= sizeof(kfd2kgd_funcs) / sizeof(void *)) {
>> @@ -713,8 +714,12 @@ struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd,
>>        * supported.
>>        */
>>       kfd->pci_atomic_requested =
>> amdgpu_amdkfd_have_atomics_support(kgd);
>
> Should the check be grouped inside amdgpu_amdkfd_have_atomics_support?
>
> This flag is used for setting some link properties. If there is HW
> support but comes with incompatible firmware, should the link be still
> marked as atomic?

Our GPU HW always supports PCIe atomics (it's part of the PCIe 3 spec).
But some mainboards with older PCIe chipsets do not. Sometimes even
different ports on the same mainboard differ in their PCIe version and
atomic support.

amdgpu_device_init always tries to enable atomics on the root port an
all the bridges leading to the GPU by calling
pci_enable_atomic_ops_to_root. The result is saved in
adev->have_atomics_support, which is returned to KFD by
amdgpu_amdkfd_have_atomics_support.

The firmware change here does not affect whether atomics are
_supported_. It changes whether atomics are _required_ for the basic
operation of AQL user mode queues. The coming firmware update will
remove that requirement, which allows us to enable KFD for these GPUs+FW
on systems without PCIe atomics.

Enabling PCIe atomics with the updated FW is still beneficial because
shader programs can use a subset of atomic instructions for accessing
system memory atomically on supported systems.

Regards,
  Felix


>
> Thanks,
> Lijo
>
>> -    if (device_info->needs_pci_atomics &&
>> -        !kfd->pci_atomic_requested) {
>> +    fw_version = amdgpu_amdkfd_get_fw_version(kgd, KGD_ENGINE_MEC1);
>> +    if (!kfd->pci_atomic_requested &&
>> +        device_info->needs_pci_atomics &&
>> +        (!device_info->no_atomic_fw_version ||
>> +          amdgpu_amdkfd_get_fw_version(kgd, KGD_ENGINE_MEC1) <
>> +            device_info->no_atomic_fw_version)) {
>>           dev_info(kfd_device,
>>                "skipped device %x:%x, PCI rejects atomics\n",
>>                pdev->vendor, pdev->device);
>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
>> b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
>> index ab83b0de6b22..6d8f9bb2d905 100644
>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
>> @@ -207,6 +207,7 @@ struct kfd_device_info {
>>       bool supports_cwsr;
>>       bool needs_iommu_device;
>>       bool needs_pci_atomics;
>> +    uint32_t no_atomic_fw_version;
>>       unsigned int num_sdma_engines;
>>       unsigned int num_xgmi_sdma_engines;
>>       unsigned int num_sdma_queues_per_engine;
>>

[-- Attachment #2: Type: text/html, Size: 7644 bytes --]

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 1/1] drm/amdkfd: make needs_pcie_atomics FW-version dependent
  2021-09-01 16:30     ` Lazar, Lijo
@ 2021-09-01 16:37       ` Alex Deucher
  2021-09-01 16:47       ` Felix Kuehling
  1 sibling, 0 replies; 9+ messages in thread
From: Alex Deucher @ 2021-09-01 16:37 UTC (permalink / raw)
  To: Lazar, Lijo; +Cc: Kuehling, Felix, amd-gfx

On Wed, Sep 1, 2021 at 12:30 PM Lazar, Lijo <Lijo.Lazar@amd.com> wrote:
>
> [Public]
>
>
> What I wanted to ask was -
>
> Whether user mode application relies only on link properties alone to assume atomic ops are supported? If they check only link properties and if the firmware doesn't work fine, should it be still marked as supported?
>
> Basically, what is the purpose of exposing atomic capability in link properties and whether that can be utilised by upper mode applications just based on PCIe atomics support?
>

PCI atomics in general and the requirement for PCI atomics in the CP
firmware are independent.  The firmware can operate either with
atomics or without.  The operation of the firmware does not affect
user processes that might want to use atomics for other things.

Alex


> Thanks,
> Lijo
> ________________________________
> From: Kuehling, Felix <Felix.Kuehling@amd.com>
> Sent: Wednesday, September 1, 2021 8:24:56 PM
> To: Lazar, Lijo <Lijo.Lazar@amd.com>; amd-gfx@lists.freedesktop.org <amd-gfx@lists.freedesktop.org>
> Subject: Re: [PATCH 1/1] drm/amdkfd: make needs_pcie_atomics FW-version dependent
>
> Am 2021-09-01 um 7:04 a.m. schrieb Lazar, Lijo:
> >
> >
> > On 9/1/2021 3:26 AM, Felix Kuehling wrote:
> >> On some GPUs the PCIe atomic requirement for KFD depends on the MEC
> >> firmware version. Add a firmware version check for this. The minimum
> >> firmware version that works without atomics can be updated in the
> >> device_info structure for each GPU type.
> >>
> >> Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
> >> ---
> >>   drivers/gpu/drm/amd/amdkfd/kfd_device.c | 9 +++++++--
> >>   drivers/gpu/drm/amd/amdkfd/kfd_priv.h   | 1 +
> >>   2 files changed, 8 insertions(+), 2 deletions(-)
> >>
> >> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> >> b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> >> index 16a57b70cc1a..655ee5733229 100644
> >> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> >> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> >> @@ -688,6 +688,7 @@ struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd,
> >>       struct kfd_dev *kfd;
> >>       const struct kfd_device_info *device_info;
> >>       const struct kfd2kgd_calls *f2g;
> >> +    uint32_t fw_version;
> >>         if (asic_type >= sizeof(kfd_supported_devices) / (sizeof(void
> >> *) * 2)
> >>           || asic_type >= sizeof(kfd2kgd_funcs) / sizeof(void *)) {
> >> @@ -713,8 +714,12 @@ struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd,
> >>        * supported.
> >>        */
> >>       kfd->pci_atomic_requested =
> >> amdgpu_amdkfd_have_atomics_support(kgd);
> >
> > Should the check be grouped inside amdgpu_amdkfd_have_atomics_support?
> >
> > This flag is used for setting some link properties. If there is HW
> > support but comes with incompatible firmware, should the link be still
> > marked as atomic?
>
> Our GPU HW always supports PCIe atomics (it's part of the PCIe 3 spec).
> But some mainboards with older PCIe chipsets do not. Sometimes even
> different ports on the same mainboard differ in their PCIe version and
> atomic support.
>
> amdgpu_device_init always tries to enable atomics on the root port an
> all the bridges leading to the GPU by calling
> pci_enable_atomic_ops_to_root. The result is saved in
> adev->have_atomics_support, which is returned to KFD by
> amdgpu_amdkfd_have_atomics_support.
>
> The firmware change here does not affect whether atomics are
> _supported_. It changes whether atomics are _required_ for the basic
> operation of AQL user mode queues. The coming firmware update will
> remove that requirement, which allows us to enable KFD for these GPUs+FW
> on systems without PCIe atomics.
>
> Enabling PCIe atomics with the updated FW is still beneficial because
> shader programs can use a subset of atomic instructions for accessing
> system memory atomically on supported systems.
>
> Regards,
>   Felix
>
>
> >
> > Thanks,
> > Lijo
> >
> >> -    if (device_info->needs_pci_atomics &&
> >> -        !kfd->pci_atomic_requested) {
> >> +    fw_version = amdgpu_amdkfd_get_fw_version(kgd, KGD_ENGINE_MEC1);
> >> +    if (!kfd->pci_atomic_requested &&
> >> +        device_info->needs_pci_atomics &&
> >> +        (!device_info->no_atomic_fw_version ||
> >> +          amdgpu_amdkfd_get_fw_version(kgd, KGD_ENGINE_MEC1) <
> >> +            device_info->no_atomic_fw_version)) {
> >>           dev_info(kfd_device,
> >>                "skipped device %x:%x, PCI rejects atomics\n",
> >>                pdev->vendor, pdev->device);
> >> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> >> b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> >> index ab83b0de6b22..6d8f9bb2d905 100644
> >> --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> >> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> >> @@ -207,6 +207,7 @@ struct kfd_device_info {
> >>       bool supports_cwsr;
> >>       bool needs_iommu_device;
> >>       bool needs_pci_atomics;
> >> +    uint32_t no_atomic_fw_version;
> >>       unsigned int num_sdma_engines;
> >>       unsigned int num_xgmi_sdma_engines;
> >>       unsigned int num_sdma_queues_per_engine;
> >>

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 1/1] drm/amdkfd: make needs_pcie_atomics FW-version dependent
  2021-09-01 16:30     ` Lazar, Lijo
  2021-09-01 16:37       ` Alex Deucher
@ 2021-09-01 16:47       ` Felix Kuehling
  2021-09-02  3:54         ` Lazar, Lijo
  1 sibling, 1 reply; 9+ messages in thread
From: Felix Kuehling @ 2021-09-01 16:47 UTC (permalink / raw)
  To: Lazar, Lijo, amd-gfx

Am 2021-09-01 um 12:30 p.m. schrieb Lazar, Lijo:
>
> [Public]
>
>
> What I wanted to ask was -
>
> Whether user mode application relies only on link properties alone to
> assume atomic ops are supported? If they check only link properties
> and if the firmware doesn't work fine, should it be still marked as
> supported?

Let's be clear what "firmware doesn't work fine" means in this context.
It means "firmware requires PCIe atomics". If firmware requires PCIe
atomics and the system doesn't support PCIe atomics, KFD will not use
the GPU and will not report the GPU to user mode.

If firmware does not require PCIe atomics, or if PCIe atomics work on
the system, KFD will use the GPU and will report the atomic capability
to user mode in the IO link attribute.


>
> Basically, what is the purpose of exposing atomic capability in link
> properties and whether that can be utilised by upper mode applications
> just based on PCIe atomics support?

Applications can use PCIe atomics by using atomic shader instructions
when accessing system memory in GPU shader code. If the system doesn't
support PCIe atomics, these atomic operations are silently dropped.
Therefore the application must check the atomic capability in the IO
link properties before relying on these instructions for system memory.

Regards,
  Felix


>
> Thanks,
> Lijo
> ------------------------------------------------------------------------
> *From:* Kuehling, Felix <Felix.Kuehling@amd.com>
> *Sent:* Wednesday, September 1, 2021 8:24:56 PM
> *To:* Lazar, Lijo <Lijo.Lazar@amd.com>; amd-gfx@lists.freedesktop.org
> <amd-gfx@lists.freedesktop.org>
> *Subject:* Re: [PATCH 1/1] drm/amdkfd: make needs_pcie_atomics
> FW-version dependent
>  
> Am 2021-09-01 um 7:04 a.m. schrieb Lazar, Lijo:
> >
> >
> > On 9/1/2021 3:26 AM, Felix Kuehling wrote:
> >> On some GPUs the PCIe atomic requirement for KFD depends on the MEC
> >> firmware version. Add a firmware version check for this. The minimum
> >> firmware version that works without atomics can be updated in the
> >> device_info structure for each GPU type.
> >>
> >> Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
> >> ---
> >>   drivers/gpu/drm/amd/amdkfd/kfd_device.c | 9 +++++++--
> >>   drivers/gpu/drm/amd/amdkfd/kfd_priv.h   | 1 +
> >>   2 files changed, 8 insertions(+), 2 deletions(-)
> >>
> >> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> >> b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> >> index 16a57b70cc1a..655ee5733229 100644
> >> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> >> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> >> @@ -688,6 +688,7 @@ struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd,
> >>       struct kfd_dev *kfd;
> >>       const struct kfd_device_info *device_info;
> >>       const struct kfd2kgd_calls *f2g;
> >> +    uint32_t fw_version;
> >>         if (asic_type >= sizeof(kfd_supported_devices) / (sizeof(void
> >> *) * 2)
> >>           || asic_type >= sizeof(kfd2kgd_funcs) / sizeof(void *)) {
> >> @@ -713,8 +714,12 @@ struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd,
> >>        * supported.
> >>        */
> >>       kfd->pci_atomic_requested =
> >> amdgpu_amdkfd_have_atomics_support(kgd);
> >
> > Should the check be grouped inside amdgpu_amdkfd_have_atomics_support?
> >
> > This flag is used for setting some link properties. If there is HW
> > support but comes with incompatible firmware, should the link be still
> > marked as atomic?
>
> Our GPU HW always supports PCIe atomics (it's part of the PCIe 3 spec).
> But some mainboards with older PCIe chipsets do not. Sometimes even
> different ports on the same mainboard differ in their PCIe version and
> atomic support.
>
> amdgpu_device_init always tries to enable atomics on the root port an
> all the bridges leading to the GPU by calling
> pci_enable_atomic_ops_to_root. The result is saved in
> adev->have_atomics_support, which is returned to KFD by
> amdgpu_amdkfd_have_atomics_support.
>
> The firmware change here does not affect whether atomics are
> _supported_. It changes whether atomics are _required_ for the basic
> operation of AQL user mode queues. The coming firmware update will
> remove that requirement, which allows us to enable KFD for these GPUs+FW
> on systems without PCIe atomics.
>
> Enabling PCIe atomics with the updated FW is still beneficial because
> shader programs can use a subset of atomic instructions for accessing
> system memory atomically on supported systems.
>
> Regards,
>   Felix
>
>
> >
> > Thanks,
> > Lijo
> >
> >> -    if (device_info->needs_pci_atomics &&
> >> -        !kfd->pci_atomic_requested) {
> >> +    fw_version = amdgpu_amdkfd_get_fw_version(kgd, KGD_ENGINE_MEC1);
> >> +    if (!kfd->pci_atomic_requested &&
> >> +        device_info->needs_pci_atomics &&
> >> +        (!device_info->no_atomic_fw_version ||
> >> +          amdgpu_amdkfd_get_fw_version(kgd, KGD_ENGINE_MEC1) <
> >> +            device_info->no_atomic_fw_version)) {
> >>           dev_info(kfd_device,
> >>                "skipped device %x:%x, PCI rejects atomics\n",
> >>                pdev->vendor, pdev->device);
> >> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> >> b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> >> index ab83b0de6b22..6d8f9bb2d905 100644
> >> --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> >> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> >> @@ -207,6 +207,7 @@ struct kfd_device_info {
> >>       bool supports_cwsr;
> >>       bool needs_iommu_device;
> >>       bool needs_pci_atomics;
> >> +    uint32_t no_atomic_fw_version;
> >>       unsigned int num_sdma_engines;
> >>       unsigned int num_xgmi_sdma_engines;
> >>       unsigned int num_sdma_queues_per_engine;
> >>

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 1/1] drm/amdkfd: make needs_pcie_atomics FW-version dependent
  2021-09-01 16:47       ` Felix Kuehling
@ 2021-09-02  3:54         ` Lazar, Lijo
  0 siblings, 0 replies; 9+ messages in thread
From: Lazar, Lijo @ 2021-09-02  3:54 UTC (permalink / raw)
  To: Felix Kuehling, amd-gfx

Thanks Felix for the detailed explanation.

Thanks,
Lijo

On 9/1/2021 10:17 PM, Felix Kuehling wrote:
> Am 2021-09-01 um 12:30 p.m. schrieb Lazar, Lijo:
>>
>> [Public]
>>
>>
>> What I wanted to ask was -
>>
>> Whether user mode application relies only on link properties alone to
>> assume atomic ops are supported? If they check only link properties
>> and if the firmware doesn't work fine, should it be still marked as
>> supported?
> 
> Let's be clear what "firmware doesn't work fine" means in this context.
> It means "firmware requires PCIe atomics". If firmware requires PCIe
> atomics and the system doesn't support PCIe atomics, KFD will not use
> the GPU and will not report the GPU to user mode.
> 
> If firmware does not require PCIe atomics, or if PCIe atomics work on
> the system, KFD will use the GPU and will report the atomic capability
> to user mode in the IO link attribute.
> 
> 
>>
>> Basically, what is the purpose of exposing atomic capability in link
>> properties and whether that can be utilised by upper mode applications
>> just based on PCIe atomics support?
> 
> Applications can use PCIe atomics by using atomic shader instructions
> when accessing system memory in GPU shader code. If the system doesn't
> support PCIe atomics, these atomic operations are silently dropped.
> Therefore the application must check the atomic capability in the IO
> link properties before relying on these instructions for system memory.
> 
> Regards,
>    Felix
> 
> 
>>
>> Thanks,
>> Lijo
>> ------------------------------------------------------------------------
>> *From:* Kuehling, Felix <Felix.Kuehling@amd.com>
>> *Sent:* Wednesday, September 1, 2021 8:24:56 PM
>> *To:* Lazar, Lijo <Lijo.Lazar@amd.com>; amd-gfx@lists.freedesktop.org
>> <amd-gfx@lists.freedesktop.org>
>> *Subject:* Re: [PATCH 1/1] drm/amdkfd: make needs_pcie_atomics
>> FW-version dependent
>>   
>> Am 2021-09-01 um 7:04 a.m. schrieb Lazar, Lijo:
>>>
>>>
>>> On 9/1/2021 3:26 AM, Felix Kuehling wrote:
>>>> On some GPUs the PCIe atomic requirement for KFD depends on the MEC
>>>> firmware version. Add a firmware version check for this. The minimum
>>>> firmware version that works without atomics can be updated in the
>>>> device_info structure for each GPU type.
>>>>
>>>> Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
>>>> ---
>>>>    drivers/gpu/drm/amd/amdkfd/kfd_device.c | 9 +++++++--
>>>>    drivers/gpu/drm/amd/amdkfd/kfd_priv.h   | 1 +
>>>>    2 files changed, 8 insertions(+), 2 deletions(-)
>>>>
>>>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
>>>> b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
>>>> index 16a57b70cc1a..655ee5733229 100644
>>>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
>>>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
>>>> @@ -688,6 +688,7 @@ struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd,
>>>>        struct kfd_dev *kfd;
>>>>        const struct kfd_device_info *device_info;
>>>>        const struct kfd2kgd_calls *f2g;
>>>> +    uint32_t fw_version;
>>>>          if (asic_type >= sizeof(kfd_supported_devices) / (sizeof(void
>>>> *) * 2)
>>>>            || asic_type >= sizeof(kfd2kgd_funcs) / sizeof(void *)) {
>>>> @@ -713,8 +714,12 @@ struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd,
>>>>         * supported.
>>>>         */
>>>>        kfd->pci_atomic_requested =
>>>> amdgpu_amdkfd_have_atomics_support(kgd);
>>>
>>> Should the check be grouped inside amdgpu_amdkfd_have_atomics_support?
>>>
>>> This flag is used for setting some link properties. If there is HW
>>> support but comes with incompatible firmware, should the link be still
>>> marked as atomic?
>>
>> Our GPU HW always supports PCIe atomics (it's part of the PCIe 3 spec).
>> But some mainboards with older PCIe chipsets do not. Sometimes even
>> different ports on the same mainboard differ in their PCIe version and
>> atomic support.
>>
>> amdgpu_device_init always tries to enable atomics on the root port an
>> all the bridges leading to the GPU by calling
>> pci_enable_atomic_ops_to_root. The result is saved in
>> adev->have_atomics_support, which is returned to KFD by
>> amdgpu_amdkfd_have_atomics_support.
>>
>> The firmware change here does not affect whether atomics are
>> _supported_. It changes whether atomics are _required_ for the basic
>> operation of AQL user mode queues. The coming firmware update will
>> remove that requirement, which allows us to enable KFD for these GPUs+FW
>> on systems without PCIe atomics.
>>
>> Enabling PCIe atomics with the updated FW is still beneficial because
>> shader programs can use a subset of atomic instructions for accessing
>> system memory atomically on supported systems.
>>
>> Regards,
>>    Felix
>>
>>
>>>
>>> Thanks,
>>> Lijo
>>>
>>>> -    if (device_info->needs_pci_atomics &&
>>>> -        !kfd->pci_atomic_requested) {
>>>> +    fw_version = amdgpu_amdkfd_get_fw_version(kgd, KGD_ENGINE_MEC1);
>>>> +    if (!kfd->pci_atomic_requested &&
>>>> +        device_info->needs_pci_atomics &&
>>>> +        (!device_info->no_atomic_fw_version ||
>>>> +          amdgpu_amdkfd_get_fw_version(kgd, KGD_ENGINE_MEC1) <
>>>> +            device_info->no_atomic_fw_version)) {
>>>>            dev_info(kfd_device,
>>>>                 "skipped device %x:%x, PCI rejects atomics\n",
>>>>                 pdev->vendor, pdev->device);
>>>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
>>>> b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
>>>> index ab83b0de6b22..6d8f9bb2d905 100644
>>>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
>>>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
>>>> @@ -207,6 +207,7 @@ struct kfd_device_info {
>>>>        bool supports_cwsr;
>>>>        bool needs_iommu_device;
>>>>        bool needs_pci_atomics;
>>>> +    uint32_t no_atomic_fw_version;
>>>>        unsigned int num_sdma_engines;
>>>>        unsigned int num_xgmi_sdma_engines;
>>>>        unsigned int num_sdma_queues_per_engine;
>>>>

^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2021-09-02  3:54 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-08-31 21:56 [PATCH 1/1] drm/amdkfd: make needs_pcie_atomics FW-version dependent Felix Kuehling
2021-08-31 22:09 ` Zeng, Oak
2021-08-31 22:22   ` Felix Kuehling
2021-09-01 11:04 ` Lazar, Lijo
2021-09-01 14:54   ` Felix Kuehling
2021-09-01 16:30     ` Lazar, Lijo
2021-09-01 16:37       ` Alex Deucher
2021-09-01 16:47       ` Felix Kuehling
2021-09-02  3:54         ` Lazar, Lijo

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.