All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] drm/amdkfd: add ACPI SRAT parsing for topology
@ 2021-04-28 15:11 Eric Huang
  2021-04-30 14:06 ` Eric Huang
                   ` (3 more replies)
  0 siblings, 4 replies; 18+ messages in thread
From: Eric Huang @ 2021-04-28 15:11 UTC (permalink / raw)
  To: amd-gfx; +Cc: Eric Huang

In NPS4 BIOS we need to find the closest numa node when creating
topology io link between cpu and gpu, if PCI driver doesn't set
it.

Signed-off-by: Eric Huang <jinhuieric.huang@amd.com>
---
 drivers/gpu/drm/amd/amdkfd/kfd_crat.c | 94 ++++++++++++++++++++++++++-
 1 file changed, 91 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
index 38d45711675f..57518136c7d7 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
@@ -1759,6 +1759,87 @@ static int kfd_fill_gpu_memory_affinity(int *avail_size,
 	return 0;
 }
 
+#ifdef CONFIG_ACPI
+static void kfd_find_numa_node_in_srat(struct kfd_dev *kdev,
+		int *numa_node)
+{
+	struct acpi_table_header *table_header = NULL;
+	struct acpi_subtable_header *sub_header = NULL;
+	unsigned long table_end, subtable_len;
+	u32 pci_id = pci_domain_nr(kdev->pdev->bus) << 16 |
+			pci_dev_id(kdev->pdev);
+	u32 bdf;
+	acpi_status status;
+	struct acpi_srat_cpu_affinity *cpu;
+	struct acpi_srat_generic_affinity *gpu;
+	int pxm = 0, max_pxm = 0;
+	bool found = false;
+
+	/* Fetch the SRAT table from ACPI */
+	status = acpi_get_table(ACPI_SIG_SRAT, 0, &table_header);
+	if (status == AE_NOT_FOUND) {
+		pr_warn("SRAT table not found\n");
+		return;
+	} else if (ACPI_FAILURE(status)) {
+		const char *err = acpi_format_exception(status);
+		pr_err("SRAT table error: %s\n", err);
+		return;
+	}
+
+	table_end = (unsigned long)table_header + table_header->length;
+
+	/* Parse all entries looking for a match. */
+
+	sub_header = (struct acpi_subtable_header *)
+			((unsigned long)table_header +
+			sizeof(struct acpi_table_srat));
+	subtable_len = sub_header->length;
+
+	while (((unsigned long)sub_header) + subtable_len  < table_end) {
+		/*
+		 * If length is 0, break from this loop to avoid
+		 * infinite loop.
+		 */
+		if (subtable_len == 0) {
+			pr_err("SRAT invalid zero length\n");
+			break;
+		}
+
+		switch (sub_header->type) {
+		case ACPI_SRAT_TYPE_CPU_AFFINITY:
+			cpu = (struct acpi_srat_cpu_affinity *)sub_header;
+			pxm = *((u32 *)cpu->proximity_domain_hi) << 8 |
+					cpu->proximity_domain_lo;
+			if (pxm > max_pxm)
+				max_pxm = pxm;
+			break;
+		case ACPI_SRAT_TYPE_GENERIC_AFFINITY:
+			gpu = (struct acpi_srat_generic_affinity *)sub_header;
+			bdf = *((u16 *)(&gpu->device_handle[0])) << 16 |
+					*((u16 *)(&gpu->device_handle[2]));
+			if (bdf == pci_id) {
+				found = true;
+				*numa_node = pxm_to_node(gpu->proximity_domain);
+			}
+			break;
+		default:
+			break;
+		}
+
+		if (found)
+			break;
+
+		sub_header = (struct acpi_subtable_header *)
+				((unsigned long)sub_header + subtable_len);
+		subtable_len = sub_header->length;
+	}
+
+	/* workaround bad cpu-gpu binding case */
+	if (found && (*numa_node < 0 || *numa_node > max_pxm))
+		*numa_node = 0;
+}
+#endif
+
 /* kfd_fill_gpu_direct_io_link - Fill in direct io link from GPU
  * to its NUMA node
  *	@avail_size: Available size in the memory
@@ -1774,6 +1855,9 @@ static int kfd_fill_gpu_direct_io_link_to_cpu(int *avail_size,
 			uint32_t proximity_domain)
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)kdev->kgd;
+#ifdef CONFIG_NUMA
+	int numa_node = 0;
+#endif
 
 	*avail_size -= sizeof(struct crat_subtype_iolink);
 	if (*avail_size < 0)
@@ -1805,9 +1889,13 @@ static int kfd_fill_gpu_direct_io_link_to_cpu(int *avail_size,
 
 	sub_type_hdr->proximity_domain_from = proximity_domain;
 #ifdef CONFIG_NUMA
-	if (kdev->pdev->dev.numa_node == NUMA_NO_NODE)
-		sub_type_hdr->proximity_domain_to = 0;
-	else
+	if (kdev->pdev->dev.numa_node == NUMA_NO_NODE) {
+#ifdef CONFIG_ACPI
+		kfd_find_numa_node_in_srat(kdev, &numa_node);
+#endif
+		sub_type_hdr->proximity_domain_to = numa_node;
+		set_dev_node(&kdev->pdev->dev, numa_node);
+	} else
 		sub_type_hdr->proximity_domain_to = kdev->pdev->dev.numa_node;
 #else
 	sub_type_hdr->proximity_domain_to = 0;
-- 
2.17.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 18+ messages in thread

* Re: [PATCH] drm/amdkfd: add ACPI SRAT parsing for topology
  2021-04-28 15:11 [PATCH] drm/amdkfd: add ACPI SRAT parsing for topology Eric Huang
@ 2021-04-30 14:06 ` Eric Huang
  2021-04-30 23:42 ` Felix Kuehling
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 18+ messages in thread
From: Eric Huang @ 2021-04-30 14:06 UTC (permalink / raw)
  To: amd-gfx

ping...

On 2021-04-28 11:11 a.m., Eric Huang wrote:
> In NPS4 BIOS we need to find the closest numa node when creating
> topology io link between cpu and gpu, if PCI driver doesn't set
> it.
>
> Signed-off-by: Eric Huang <jinhuieric.huang@amd.com>
> ---
>   drivers/gpu/drm/amd/amdkfd/kfd_crat.c | 94 ++++++++++++++++++++++++++-
>   1 file changed, 91 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
> index 38d45711675f..57518136c7d7 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
> @@ -1759,6 +1759,87 @@ static int kfd_fill_gpu_memory_affinity(int *avail_size,
>   	return 0;
>   }
>   
> +#ifdef CONFIG_ACPI
> +static void kfd_find_numa_node_in_srat(struct kfd_dev *kdev,
> +		int *numa_node)
> +{
> +	struct acpi_table_header *table_header = NULL;
> +	struct acpi_subtable_header *sub_header = NULL;
> +	unsigned long table_end, subtable_len;
> +	u32 pci_id = pci_domain_nr(kdev->pdev->bus) << 16 |
> +			pci_dev_id(kdev->pdev);
> +	u32 bdf;
> +	acpi_status status;
> +	struct acpi_srat_cpu_affinity *cpu;
> +	struct acpi_srat_generic_affinity *gpu;
> +	int pxm = 0, max_pxm = 0;
> +	bool found = false;
> +
> +	/* Fetch the SRAT table from ACPI */
> +	status = acpi_get_table(ACPI_SIG_SRAT, 0, &table_header);
> +	if (status == AE_NOT_FOUND) {
> +		pr_warn("SRAT table not found\n");
> +		return;
> +	} else if (ACPI_FAILURE(status)) {
> +		const char *err = acpi_format_exception(status);
> +		pr_err("SRAT table error: %s\n", err);
> +		return;
> +	}
> +
> +	table_end = (unsigned long)table_header + table_header->length;
> +
> +	/* Parse all entries looking for a match. */
> +
> +	sub_header = (struct acpi_subtable_header *)
> +			((unsigned long)table_header +
> +			sizeof(struct acpi_table_srat));
> +	subtable_len = sub_header->length;
> +
> +	while (((unsigned long)sub_header) + subtable_len  < table_end) {
> +		/*
> +		 * If length is 0, break from this loop to avoid
> +		 * infinite loop.
> +		 */
> +		if (subtable_len == 0) {
> +			pr_err("SRAT invalid zero length\n");
> +			break;
> +		}
> +
> +		switch (sub_header->type) {
> +		case ACPI_SRAT_TYPE_CPU_AFFINITY:
> +			cpu = (struct acpi_srat_cpu_affinity *)sub_header;
> +			pxm = *((u32 *)cpu->proximity_domain_hi) << 8 |
> +					cpu->proximity_domain_lo;
> +			if (pxm > max_pxm)
> +				max_pxm = pxm;
> +			break;
> +		case ACPI_SRAT_TYPE_GENERIC_AFFINITY:
> +			gpu = (struct acpi_srat_generic_affinity *)sub_header;
> +			bdf = *((u16 *)(&gpu->device_handle[0])) << 16 |
> +					*((u16 *)(&gpu->device_handle[2]));
> +			if (bdf == pci_id) {
> +				found = true;
> +				*numa_node = pxm_to_node(gpu->proximity_domain);
> +			}
> +			break;
> +		default:
> +			break;
> +		}
> +
> +		if (found)
> +			break;
> +
> +		sub_header = (struct acpi_subtable_header *)
> +				((unsigned long)sub_header + subtable_len);
> +		subtable_len = sub_header->length;
> +	}
> +
> +	/* workaround bad cpu-gpu binding case */
> +	if (found && (*numa_node < 0 || *numa_node > max_pxm))
> +		*numa_node = 0;
> +}
> +#endif
> +
>   /* kfd_fill_gpu_direct_io_link - Fill in direct io link from GPU
>    * to its NUMA node
>    *	@avail_size: Available size in the memory
> @@ -1774,6 +1855,9 @@ static int kfd_fill_gpu_direct_io_link_to_cpu(int *avail_size,
>   			uint32_t proximity_domain)
>   {
>   	struct amdgpu_device *adev = (struct amdgpu_device *)kdev->kgd;
> +#ifdef CONFIG_NUMA
> +	int numa_node = 0;
> +#endif
>   
>   	*avail_size -= sizeof(struct crat_subtype_iolink);
>   	if (*avail_size < 0)
> @@ -1805,9 +1889,13 @@ static int kfd_fill_gpu_direct_io_link_to_cpu(int *avail_size,
>   
>   	sub_type_hdr->proximity_domain_from = proximity_domain;
>   #ifdef CONFIG_NUMA
> -	if (kdev->pdev->dev.numa_node == NUMA_NO_NODE)
> -		sub_type_hdr->proximity_domain_to = 0;
> -	else
> +	if (kdev->pdev->dev.numa_node == NUMA_NO_NODE) {
> +#ifdef CONFIG_ACPI
> +		kfd_find_numa_node_in_srat(kdev, &numa_node);
> +#endif
> +		sub_type_hdr->proximity_domain_to = numa_node;
> +		set_dev_node(&kdev->pdev->dev, numa_node);
> +	} else
>   		sub_type_hdr->proximity_domain_to = kdev->pdev->dev.numa_node;
>   #else
>   	sub_type_hdr->proximity_domain_to = 0;
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] drm/amdkfd: add ACPI SRAT parsing for topology
  2021-04-28 15:11 [PATCH] drm/amdkfd: add ACPI SRAT parsing for topology Eric Huang
  2021-04-30 14:06 ` Eric Huang
@ 2021-04-30 23:42 ` Felix Kuehling
  2021-05-03 13:52   ` Eric Huang
  2021-05-03 18:43 ` Zeng, Oak
  2021-05-04  7:46 ` Lazar, Lijo
  3 siblings, 1 reply; 18+ messages in thread
From: Felix Kuehling @ 2021-04-30 23:42 UTC (permalink / raw)
  To: amd-gfx, Huang, JinHuiEric

Am 2021-04-28 um 11:11 a.m. schrieb Eric Huang:
> In NPS4 BIOS we need to find the closest numa node when creating
> topology io link between cpu and gpu, if PCI driver doesn't set
> it.
>
> Signed-off-by: Eric Huang <jinhuieric.huang@amd.com>
> ---
>  drivers/gpu/drm/amd/amdkfd/kfd_crat.c | 94 ++++++++++++++++++++++++++-
>  1 file changed, 91 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
> index 38d45711675f..57518136c7d7 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
> @@ -1759,6 +1759,87 @@ static int kfd_fill_gpu_memory_affinity(int *avail_size,
>  	return 0;
>  }
>  
> +#ifdef CONFIG_ACPI
> +static void kfd_find_numa_node_in_srat(struct kfd_dev *kdev,
> +		int *numa_node)
> +{
> +	struct acpi_table_header *table_header = NULL;
> +	struct acpi_subtable_header *sub_header = NULL;
> +	unsigned long table_end, subtable_len;
> +	u32 pci_id = pci_domain_nr(kdev->pdev->bus) << 16 |
> +			pci_dev_id(kdev->pdev);
> +	u32 bdf;
> +	acpi_status status;
> +	struct acpi_srat_cpu_affinity *cpu;
> +	struct acpi_srat_generic_affinity *gpu;
> +	int pxm = 0, max_pxm = 0;
> +	bool found = false;
> +
> +	/* Fetch the SRAT table from ACPI */
> +	status = acpi_get_table(ACPI_SIG_SRAT, 0, &table_header);
> +	if (status == AE_NOT_FOUND) {
> +		pr_warn("SRAT table not found\n");
> +		return;
> +	} else if (ACPI_FAILURE(status)) {
> +		const char *err = acpi_format_exception(status);
> +		pr_err("SRAT table error: %s\n", err);
> +		return;
> +	}
After a successful call to acpi_get_table you need to call
acpi_put_table before this function returns to avoid leaking memory.


> +
> +	table_end = (unsigned long)table_header + table_header->length;
> +
> +	/* Parse all entries looking for a match. */
> +
> +	sub_header = (struct acpi_subtable_header *)
> +			((unsigned long)table_header +
> +			sizeof(struct acpi_table_srat));
> +	subtable_len = sub_header->length;
> +
> +	while (((unsigned long)sub_header) + subtable_len  < table_end) {
> +		/*
> +		 * If length is 0, break from this loop to avoid
> +		 * infinite loop.
> +		 */
> +		if (subtable_len == 0) {
> +			pr_err("SRAT invalid zero length\n");
> +			break;
> +		}
> +
> +		switch (sub_header->type) {
> +		case ACPI_SRAT_TYPE_CPU_AFFINITY:
> +			cpu = (struct acpi_srat_cpu_affinity *)sub_header;
> +			pxm = *((u32 *)cpu->proximity_domain_hi) << 8 |
> +					cpu->proximity_domain_lo;
> +			if (pxm > max_pxm)
> +				max_pxm = pxm;
> +			break;
> +		case ACPI_SRAT_TYPE_GENERIC_AFFINITY:
> +			gpu = (struct acpi_srat_generic_affinity *)sub_header;
> +			bdf = *((u16 *)(&gpu->device_handle[0])) << 16 |
> +					*((u16 *)(&gpu->device_handle[2]));
> +			if (bdf == pci_id) {
> +				found = true;
> +				*numa_node = pxm_to_node(gpu->proximity_domain);
> +			}
> +			break;
> +		default:
> +			break;
> +		}
> +
> +		if (found)
> +			break;
> +
> +		sub_header = (struct acpi_subtable_header *)
> +				((unsigned long)sub_header + subtable_len);
> +		subtable_len = sub_header->length;
> +	}
> +
> +	/* workaround bad cpu-gpu binding case */
> +	if (found && (*numa_node < 0 || *numa_node > max_pxm))
> +		*numa_node = 0;

A suggestion: If you find a sensible NUMA node, call set_dev_node here.
That simplifies the caller. See below


> +}
> +#endif
> +
>  /* kfd_fill_gpu_direct_io_link - Fill in direct io link from GPU
>   * to its NUMA node
>   *	@avail_size: Available size in the memory
> @@ -1774,6 +1855,9 @@ static int kfd_fill_gpu_direct_io_link_to_cpu(int *avail_size,
>  			uint32_t proximity_domain)
>  {
>  	struct amdgpu_device *adev = (struct amdgpu_device *)kdev->kgd;
> +#ifdef CONFIG_NUMA
> +	int numa_node = 0;

Should this be NUMA_NO_NODE?


> +#endif
>  
>  	*avail_size -= sizeof(struct crat_subtype_iolink);
>  	if (*avail_size < 0)
> @@ -1805,9 +1889,13 @@ static int kfd_fill_gpu_direct_io_link_to_cpu(int *avail_size,
>  
>  	sub_type_hdr->proximity_domain_from = proximity_domain;
>  #ifdef CONFIG_NUMA
> -	if (kdev->pdev->dev.numa_node == NUMA_NO_NODE)
> -		sub_type_hdr->proximity_domain_to = 0;
> -	else
> +	if (kdev->pdev->dev.numa_node == NUMA_NO_NODE) {
> +#ifdef CONFIG_ACPI
> +		kfd_find_numa_node_in_srat(kdev, &numa_node);
> +#endif
> +		sub_type_hdr->proximity_domain_to = numa_node;
> +		set_dev_node(&kdev->pdev->dev, numa_node);
> +	} else
>  		sub_type_hdr->proximity_domain_to = kdev->pdev->dev.numa_node;

It's better style to use braces on all if/else branches, if one branch
needs them.

But with my suggestion above this would become simpler:

+#ifdef CONFIG_ACPI
+	if (kdev->pdev->dev.numa_node == NUMA_NO_NODE)
+		kfd_find_numa_node_in_srat(kdev);
+#endif
	if (kdev->pdev->dev.numa_node == NUMA_NO_NODE)
		sub_type_hdr->proximity_domain_to = 0;
	else
		sub_type_hdr->proximity_domain_to = kdev->pdev->dev.numa_node;

Regards,
  Felix


>  #else
>  	sub_type_hdr->proximity_domain_to = 0;
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] drm/amdkfd: add ACPI SRAT parsing for topology
  2021-04-30 23:42 ` Felix Kuehling
@ 2021-05-03 13:52   ` Eric Huang
  0 siblings, 0 replies; 18+ messages in thread
From: Eric Huang @ 2021-05-03 13:52 UTC (permalink / raw)
  To: Felix Kuehling, amd-gfx

Thanks Felix for your review. I will send another patch.

Eric

On 2021-04-30 7:42 p.m., Felix Kuehling wrote:
> Am 2021-04-28 um 11:11 a.m. schrieb Eric Huang:
>> In NPS4 BIOS we need to find the closest numa node when creating
>> topology io link between cpu and gpu, if PCI driver doesn't set
>> it.
>>
>> Signed-off-by: Eric Huang <jinhuieric.huang@amd.com>
>> ---
>>   drivers/gpu/drm/amd/amdkfd/kfd_crat.c | 94 ++++++++++++++++++++++++++-
>>   1 file changed, 91 insertions(+), 3 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
>> index 38d45711675f..57518136c7d7 100644
>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
>> @@ -1759,6 +1759,87 @@ static int kfd_fill_gpu_memory_affinity(int *avail_size,
>>   	return 0;
>>   }
>>   
>> +#ifdef CONFIG_ACPI
>> +static void kfd_find_numa_node_in_srat(struct kfd_dev *kdev,
>> +		int *numa_node)
>> +{
>> +	struct acpi_table_header *table_header = NULL;
>> +	struct acpi_subtable_header *sub_header = NULL;
>> +	unsigned long table_end, subtable_len;
>> +	u32 pci_id = pci_domain_nr(kdev->pdev->bus) << 16 |
>> +			pci_dev_id(kdev->pdev);
>> +	u32 bdf;
>> +	acpi_status status;
>> +	struct acpi_srat_cpu_affinity *cpu;
>> +	struct acpi_srat_generic_affinity *gpu;
>> +	int pxm = 0, max_pxm = 0;
>> +	bool found = false;
>> +
>> +	/* Fetch the SRAT table from ACPI */
>> +	status = acpi_get_table(ACPI_SIG_SRAT, 0, &table_header);
>> +	if (status == AE_NOT_FOUND) {
>> +		pr_warn("SRAT table not found\n");
>> +		return;
>> +	} else if (ACPI_FAILURE(status)) {
>> +		const char *err = acpi_format_exception(status);
>> +		pr_err("SRAT table error: %s\n", err);
>> +		return;
>> +	}
> After a successful call to acpi_get_table you need to call
> acpi_put_table before this function returns to avoid leaking memory.
>
>
>> +
>> +	table_end = (unsigned long)table_header + table_header->length;
>> +
>> +	/* Parse all entries looking for a match. */
>> +
>> +	sub_header = (struct acpi_subtable_header *)
>> +			((unsigned long)table_header +
>> +			sizeof(struct acpi_table_srat));
>> +	subtable_len = sub_header->length;
>> +
>> +	while (((unsigned long)sub_header) + subtable_len  < table_end) {
>> +		/*
>> +		 * If length is 0, break from this loop to avoid
>> +		 * infinite loop.
>> +		 */
>> +		if (subtable_len == 0) {
>> +			pr_err("SRAT invalid zero length\n");
>> +			break;
>> +		}
>> +
>> +		switch (sub_header->type) {
>> +		case ACPI_SRAT_TYPE_CPU_AFFINITY:
>> +			cpu = (struct acpi_srat_cpu_affinity *)sub_header;
>> +			pxm = *((u32 *)cpu->proximity_domain_hi) << 8 |
>> +					cpu->proximity_domain_lo;
>> +			if (pxm > max_pxm)
>> +				max_pxm = pxm;
>> +			break;
>> +		case ACPI_SRAT_TYPE_GENERIC_AFFINITY:
>> +			gpu = (struct acpi_srat_generic_affinity *)sub_header;
>> +			bdf = *((u16 *)(&gpu->device_handle[0])) << 16 |
>> +					*((u16 *)(&gpu->device_handle[2]));
>> +			if (bdf == pci_id) {
>> +				found = true;
>> +				*numa_node = pxm_to_node(gpu->proximity_domain);
>> +			}
>> +			break;
>> +		default:
>> +			break;
>> +		}
>> +
>> +		if (found)
>> +			break;
>> +
>> +		sub_header = (struct acpi_subtable_header *)
>> +				((unsigned long)sub_header + subtable_len);
>> +		subtable_len = sub_header->length;
>> +	}
>> +
>> +	/* workaround bad cpu-gpu binding case */
>> +	if (found && (*numa_node < 0 || *numa_node > max_pxm))
>> +		*numa_node = 0;
> A suggestion: If you find a sensible NUMA node, call set_dev_node here.
> That simplifies the caller. See below
>
>
>> +}
>> +#endif
>> +
>>   /* kfd_fill_gpu_direct_io_link - Fill in direct io link from GPU
>>    * to its NUMA node
>>    *	@avail_size: Available size in the memory
>> @@ -1774,6 +1855,9 @@ static int kfd_fill_gpu_direct_io_link_to_cpu(int *avail_size,
>>   			uint32_t proximity_domain)
>>   {
>>   	struct amdgpu_device *adev = (struct amdgpu_device *)kdev->kgd;
>> +#ifdef CONFIG_NUMA
>> +	int numa_node = 0;
> Should this be NUMA_NO_NODE?
>
>
>> +#endif
>>   
>>   	*avail_size -= sizeof(struct crat_subtype_iolink);
>>   	if (*avail_size < 0)
>> @@ -1805,9 +1889,13 @@ static int kfd_fill_gpu_direct_io_link_to_cpu(int *avail_size,
>>   
>>   	sub_type_hdr->proximity_domain_from = proximity_domain;
>>   #ifdef CONFIG_NUMA
>> -	if (kdev->pdev->dev.numa_node == NUMA_NO_NODE)
>> -		sub_type_hdr->proximity_domain_to = 0;
>> -	else
>> +	if (kdev->pdev->dev.numa_node == NUMA_NO_NODE) {
>> +#ifdef CONFIG_ACPI
>> +		kfd_find_numa_node_in_srat(kdev, &numa_node);
>> +#endif
>> +		sub_type_hdr->proximity_domain_to = numa_node;
>> +		set_dev_node(&kdev->pdev->dev, numa_node);
>> +	} else
>>   		sub_type_hdr->proximity_domain_to = kdev->pdev->dev.numa_node;
> It's better style to use braces on all if/else branches, if one branch
> needs them.
>
> But with my suggestion above this would become simpler:
>
> +#ifdef CONFIG_ACPI
> +	if (kdev->pdev->dev.numa_node == NUMA_NO_NODE)
> +		kfd_find_numa_node_in_srat(kdev);
> +#endif
> 	if (kdev->pdev->dev.numa_node == NUMA_NO_NODE)
> 		sub_type_hdr->proximity_domain_to = 0;
> 	else
> 		sub_type_hdr->proximity_domain_to = kdev->pdev->dev.numa_node;
>
> Regards,
>    Felix
>
>
>>   #else
>>   	sub_type_hdr->proximity_domain_to = 0;

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] drm/amdkfd: add ACPI SRAT parsing for topology
  2021-04-28 15:11 [PATCH] drm/amdkfd: add ACPI SRAT parsing for topology Eric Huang
  2021-04-30 14:06 ` Eric Huang
  2021-04-30 23:42 ` Felix Kuehling
@ 2021-05-03 18:43 ` Zeng, Oak
  2021-05-03 19:13   ` Eric Huang
  2021-05-04  7:46 ` Lazar, Lijo
  3 siblings, 1 reply; 18+ messages in thread
From: Zeng, Oak @ 2021-05-03 18:43 UTC (permalink / raw)
  To: Huang, JinHuiEric, amd-gfx; +Cc: Kuehling, Felix

I feel such parsing work should be part of the ACPI generic work so should be done in drivers/acpi/num/srat.c (see acpi_table_parse_srat) and the acpi subsystem should expose APIs for rest drivers to query such numa information.

Regards,
Oak 

 

On 2021-04-28, 11:12 AM, "amd-gfx on behalf of Eric Huang" <amd-gfx-bounces@lists.freedesktop.org on behalf of JinHuiEric.Huang@amd.com> wrote:

    In NPS4 BIOS we need to find the closest numa node when creating
    topology io link between cpu and gpu, if PCI driver doesn't set
    it.

    Signed-off-by: Eric Huang <jinhuieric.huang@amd.com>
    ---
     drivers/gpu/drm/amd/amdkfd/kfd_crat.c | 94 ++++++++++++++++++++++++++-
     1 file changed, 91 insertions(+), 3 deletions(-)

    diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
    index 38d45711675f..57518136c7d7 100644
    --- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
    +++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
    @@ -1759,6 +1759,87 @@ static int kfd_fill_gpu_memory_affinity(int *avail_size,
     	return 0;
     }

    +#ifdef CONFIG_ACPI
    +static void kfd_find_numa_node_in_srat(struct kfd_dev *kdev,
    +		int *numa_node)
    +{
    +	struct acpi_table_header *table_header = NULL;
    +	struct acpi_subtable_header *sub_header = NULL;
    +	unsigned long table_end, subtable_len;
    +	u32 pci_id = pci_domain_nr(kdev->pdev->bus) << 16 |
    +			pci_dev_id(kdev->pdev);
    +	u32 bdf;
    +	acpi_status status;
    +	struct acpi_srat_cpu_affinity *cpu;
    +	struct acpi_srat_generic_affinity *gpu;
    +	int pxm = 0, max_pxm = 0;
    +	bool found = false;
    +
    +	/* Fetch the SRAT table from ACPI */
    +	status = acpi_get_table(ACPI_SIG_SRAT, 0, &table_header);
    +	if (status == AE_NOT_FOUND) {
    +		pr_warn("SRAT table not found\n");
    +		return;
    +	} else if (ACPI_FAILURE(status)) {
    +		const char *err = acpi_format_exception(status);
    +		pr_err("SRAT table error: %s\n", err);
    +		return;
    +	}
    +
    +	table_end = (unsigned long)table_header + table_header->length;
    +
    +	/* Parse all entries looking for a match. */
    +
    +	sub_header = (struct acpi_subtable_header *)
    +			((unsigned long)table_header +
    +			sizeof(struct acpi_table_srat));
    +	subtable_len = sub_header->length;
    +
    +	while (((unsigned long)sub_header) + subtable_len  < table_end) {
    +		/*
    +		 * If length is 0, break from this loop to avoid
    +		 * infinite loop.
    +		 */
    +		if (subtable_len == 0) {
    +			pr_err("SRAT invalid zero length\n");
    +			break;
    +		}
    +
    +		switch (sub_header->type) {
    +		case ACPI_SRAT_TYPE_CPU_AFFINITY:
    +			cpu = (struct acpi_srat_cpu_affinity *)sub_header;
    +			pxm = *((u32 *)cpu->proximity_domain_hi) << 8 |
    +					cpu->proximity_domain_lo;
    +			if (pxm > max_pxm)
    +				max_pxm = pxm;
    +			break;
    +		case ACPI_SRAT_TYPE_GENERIC_AFFINITY:
    +			gpu = (struct acpi_srat_generic_affinity *)sub_header;
    +			bdf = *((u16 *)(&gpu->device_handle[0])) << 16 |
    +					*((u16 *)(&gpu->device_handle[2]));
    +			if (bdf == pci_id) {
    +				found = true;
    +				*numa_node = pxm_to_node(gpu->proximity_domain);
    +			}
    +			break;
    +		default:
    +			break;
    +		}
    +
    +		if (found)
    +			break;
    +
    +		sub_header = (struct acpi_subtable_header *)
    +				((unsigned long)sub_header + subtable_len);
    +		subtable_len = sub_header->length;
    +	}
    +
    +	/* workaround bad cpu-gpu binding case */
    +	if (found && (*numa_node < 0 || *numa_node > max_pxm))
    +		*numa_node = 0;
    +}
    +#endif
    +
     /* kfd_fill_gpu_direct_io_link - Fill in direct io link from GPU
      * to its NUMA node
      *	@avail_size: Available size in the memory
    @@ -1774,6 +1855,9 @@ static int kfd_fill_gpu_direct_io_link_to_cpu(int *avail_size,
     			uint32_t proximity_domain)
     {
     	struct amdgpu_device *adev = (struct amdgpu_device *)kdev->kgd;
    +#ifdef CONFIG_NUMA
    +	int numa_node = 0;
    +#endif

     	*avail_size -= sizeof(struct crat_subtype_iolink);
     	if (*avail_size < 0)
    @@ -1805,9 +1889,13 @@ static int kfd_fill_gpu_direct_io_link_to_cpu(int *avail_size,

     	sub_type_hdr->proximity_domain_from = proximity_domain;
     #ifdef CONFIG_NUMA
    -	if (kdev->pdev->dev.numa_node == NUMA_NO_NODE)
    -		sub_type_hdr->proximity_domain_to = 0;
    -	else
    +	if (kdev->pdev->dev.numa_node == NUMA_NO_NODE) {
    +#ifdef CONFIG_ACPI
    +		kfd_find_numa_node_in_srat(kdev, &numa_node);
    +#endif
    +		sub_type_hdr->proximity_domain_to = numa_node;
    +		set_dev_node(&kdev->pdev->dev, numa_node);
    +	} else
     		sub_type_hdr->proximity_domain_to = kdev->pdev->dev.numa_node;
     #else
     	sub_type_hdr->proximity_domain_to = 0;
    -- 
    2.17.1

    _______________________________________________
    amd-gfx mailing list
    amd-gfx@lists.freedesktop.org
    https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Famd-gfx&amp;data=04%7C01%7Coak.zeng%40amd.com%7C96808a6aab7b40861eeb08d90a580524%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637552195437139248%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&amp;sdata=vx9wqAehK7zUJy2mVAp9KOV3u4ZNvE%2BmDGfGGK%2F8chU%3D&amp;reserved=0

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] drm/amdkfd: add ACPI SRAT parsing for topology
  2021-05-03 18:43 ` Zeng, Oak
@ 2021-05-03 19:13   ` Eric Huang
  2021-05-04  2:17     ` Zeng, Oak
  0 siblings, 1 reply; 18+ messages in thread
From: Eric Huang @ 2021-05-03 19:13 UTC (permalink / raw)
  To: Zeng, Oak, amd-gfx; +Cc: Kuehling, Felix

In drivers/acpi/numa/srat.c, the generic CCD parsing is for the mapping 
of numa node and pxm domain that creates arrays of pxm_to_node_map and 
node_to_pxm_map. We are currently using API pxm_to_node() to get the 
corresponding information.

For GCD parsing, the relation of GCD to CCD is defined by AMD, generic 
parsing in srat.c is considering a GCD as a new numa node which is not 
suitable for our need.

Regards,
Eric

On 2021-05-03 2:43 p.m., Zeng, Oak wrote:
> I feel such parsing work should be part of the ACPI generic work so should be done in drivers/acpi/num/srat.c (see acpi_table_parse_srat) and the acpi subsystem should expose APIs for rest drivers to query such numa information.
>
> Regards,
> Oak
>
>   
>
> On 2021-04-28, 11:12 AM, "amd-gfx on behalf of Eric Huang" <amd-gfx-bounces@lists.freedesktop.org on behalf of JinHuiEric.Huang@amd.com> wrote:
>
>      In NPS4 BIOS we need to find the closest numa node when creating
>      topology io link between cpu and gpu, if PCI driver doesn't set
>      it.
>
>      Signed-off-by: Eric Huang <jinhuieric.huang@amd.com>
>      ---
>       drivers/gpu/drm/amd/amdkfd/kfd_crat.c | 94 ++++++++++++++++++++++++++-
>       1 file changed, 91 insertions(+), 3 deletions(-)
>
>      diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
>      index 38d45711675f..57518136c7d7 100644
>      --- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
>      +++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
>      @@ -1759,6 +1759,87 @@ static int kfd_fill_gpu_memory_affinity(int *avail_size,
>       	return 0;
>       }
>
>      +#ifdef CONFIG_ACPI
>      +static void kfd_find_numa_node_in_srat(struct kfd_dev *kdev,
>      +		int *numa_node)
>      +{
>      +	struct acpi_table_header *table_header = NULL;
>      +	struct acpi_subtable_header *sub_header = NULL;
>      +	unsigned long table_end, subtable_len;
>      +	u32 pci_id = pci_domain_nr(kdev->pdev->bus) << 16 |
>      +			pci_dev_id(kdev->pdev);
>      +	u32 bdf;
>      +	acpi_status status;
>      +	struct acpi_srat_cpu_affinity *cpu;
>      +	struct acpi_srat_generic_affinity *gpu;
>      +	int pxm = 0, max_pxm = 0;
>      +	bool found = false;
>      +
>      +	/* Fetch the SRAT table from ACPI */
>      +	status = acpi_get_table(ACPI_SIG_SRAT, 0, &table_header);
>      +	if (status == AE_NOT_FOUND) {
>      +		pr_warn("SRAT table not found\n");
>      +		return;
>      +	} else if (ACPI_FAILURE(status)) {
>      +		const char *err = acpi_format_exception(status);
>      +		pr_err("SRAT table error: %s\n", err);
>      +		return;
>      +	}
>      +
>      +	table_end = (unsigned long)table_header + table_header->length;
>      +
>      +	/* Parse all entries looking for a match. */
>      +
>      +	sub_header = (struct acpi_subtable_header *)
>      +			((unsigned long)table_header +
>      +			sizeof(struct acpi_table_srat));
>      +	subtable_len = sub_header->length;
>      +
>      +	while (((unsigned long)sub_header) + subtable_len  < table_end) {
>      +		/*
>      +		 * If length is 0, break from this loop to avoid
>      +		 * infinite loop.
>      +		 */
>      +		if (subtable_len == 0) {
>      +			pr_err("SRAT invalid zero length\n");
>      +			break;
>      +		}
>      +
>      +		switch (sub_header->type) {
>      +		case ACPI_SRAT_TYPE_CPU_AFFINITY:
>      +			cpu = (struct acpi_srat_cpu_affinity *)sub_header;
>      +			pxm = *((u32 *)cpu->proximity_domain_hi) << 8 |
>      +					cpu->proximity_domain_lo;
>      +			if (pxm > max_pxm)
>      +				max_pxm = pxm;
>      +			break;
>      +		case ACPI_SRAT_TYPE_GENERIC_AFFINITY:
>      +			gpu = (struct acpi_srat_generic_affinity *)sub_header;
>      +			bdf = *((u16 *)(&gpu->device_handle[0])) << 16 |
>      +					*((u16 *)(&gpu->device_handle[2]));
>      +			if (bdf == pci_id) {
>      +				found = true;
>      +				*numa_node = pxm_to_node(gpu->proximity_domain);
>      +			}
>      +			break;
>      +		default:
>      +			break;
>      +		}
>      +
>      +		if (found)
>      +			break;
>      +
>      +		sub_header = (struct acpi_subtable_header *)
>      +				((unsigned long)sub_header + subtable_len);
>      +		subtable_len = sub_header->length;
>      +	}
>      +
>      +	/* workaround bad cpu-gpu binding case */
>      +	if (found && (*numa_node < 0 || *numa_node > max_pxm))
>      +		*numa_node = 0;
>      +}
>      +#endif
>      +
>       /* kfd_fill_gpu_direct_io_link - Fill in direct io link from GPU
>        * to its NUMA node
>        *	@avail_size: Available size in the memory
>      @@ -1774,6 +1855,9 @@ static int kfd_fill_gpu_direct_io_link_to_cpu(int *avail_size,
>       			uint32_t proximity_domain)
>       {
>       	struct amdgpu_device *adev = (struct amdgpu_device *)kdev->kgd;
>      +#ifdef CONFIG_NUMA
>      +	int numa_node = 0;
>      +#endif
>
>       	*avail_size -= sizeof(struct crat_subtype_iolink);
>       	if (*avail_size < 0)
>      @@ -1805,9 +1889,13 @@ static int kfd_fill_gpu_direct_io_link_to_cpu(int *avail_size,
>
>       	sub_type_hdr->proximity_domain_from = proximity_domain;
>       #ifdef CONFIG_NUMA
>      -	if (kdev->pdev->dev.numa_node == NUMA_NO_NODE)
>      -		sub_type_hdr->proximity_domain_to = 0;
>      -	else
>      +	if (kdev->pdev->dev.numa_node == NUMA_NO_NODE) {
>      +#ifdef CONFIG_ACPI
>      +		kfd_find_numa_node_in_srat(kdev, &numa_node);
>      +#endif
>      +		sub_type_hdr->proximity_domain_to = numa_node;
>      +		set_dev_node(&kdev->pdev->dev, numa_node);
>      +	} else
>       		sub_type_hdr->proximity_domain_to = kdev->pdev->dev.numa_node;
>       #else
>       	sub_type_hdr->proximity_domain_to = 0;
>      --
>      2.17.1
>
>      _______________________________________________
>      amd-gfx mailing list
>      amd-gfx@lists.freedesktop.org
>      https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Famd-gfx&amp;data=04%7C01%7Coak.zeng%40amd.com%7C96808a6aab7b40861eeb08d90a580524%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637552195437139248%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&amp;sdata=vx9wqAehK7zUJy2mVAp9KOV3u4ZNvE%2BmDGfGGK%2F8chU%3D&amp;reserved=0
>

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] drm/amdkfd: add ACPI SRAT parsing for topology
  2021-05-03 19:13   ` Eric Huang
@ 2021-05-04  2:17     ` Zeng, Oak
  0 siblings, 0 replies; 18+ messages in thread
From: Zeng, Oak @ 2021-05-04  2:17 UTC (permalink / raw)
  To: Huang, JinHuiEric, amd-gfx; +Cc: Kuehling, Felix

Ok, that makes sense. Thanks for explaining.

Regards,
Oak 

 

On 2021-05-03, 3:13 PM, "Huang, JinHuiEric" <JinHuiEric.Huang@amd.com> wrote:

    In drivers/acpi/numa/srat.c, the generic CCD parsing is for the mapping 
    of numa node and pxm domain that creates arrays of pxm_to_node_map and 
    node_to_pxm_map. We are currently using API pxm_to_node() to get the 
    corresponding information.

    For GCD parsing, the relation of GCD to CCD is defined by AMD, generic 
    parsing in srat.c is considering a GCD as a new numa node which is not 
    suitable for our need.

    Regards,
    Eric

    On 2021-05-03 2:43 p.m., Zeng, Oak wrote:
    > I feel such parsing work should be part of the ACPI generic work so should be done in drivers/acpi/num/srat.c (see acpi_table_parse_srat) and the acpi subsystem should expose APIs for rest drivers to query such numa information.
    >
    > Regards,
    > Oak
    >
    >   
    >
    > On 2021-04-28, 11:12 AM, "amd-gfx on behalf of Eric Huang" <amd-gfx-bounces@lists.freedesktop.org on behalf of JinHuiEric.Huang@amd.com> wrote:
    >
    >      In NPS4 BIOS we need to find the closest numa node when creating
    >      topology io link between cpu and gpu, if PCI driver doesn't set
    >      it.
    >
    >      Signed-off-by: Eric Huang <jinhuieric.huang@amd.com>
    >      ---
    >       drivers/gpu/drm/amd/amdkfd/kfd_crat.c | 94 ++++++++++++++++++++++++++-
    >       1 file changed, 91 insertions(+), 3 deletions(-)
    >
    >      diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
    >      index 38d45711675f..57518136c7d7 100644
    >      --- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
    >      +++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
    >      @@ -1759,6 +1759,87 @@ static int kfd_fill_gpu_memory_affinity(int *avail_size,
    >       	return 0;
    >       }
    >
    >      +#ifdef CONFIG_ACPI
    >      +static void kfd_find_numa_node_in_srat(struct kfd_dev *kdev,
    >      +		int *numa_node)
    >      +{
    >      +	struct acpi_table_header *table_header = NULL;
    >      +	struct acpi_subtable_header *sub_header = NULL;
    >      +	unsigned long table_end, subtable_len;
    >      +	u32 pci_id = pci_domain_nr(kdev->pdev->bus) << 16 |
    >      +			pci_dev_id(kdev->pdev);
    >      +	u32 bdf;
    >      +	acpi_status status;
    >      +	struct acpi_srat_cpu_affinity *cpu;
    >      +	struct acpi_srat_generic_affinity *gpu;
    >      +	int pxm = 0, max_pxm = 0;
    >      +	bool found = false;
    >      +
    >      +	/* Fetch the SRAT table from ACPI */
    >      +	status = acpi_get_table(ACPI_SIG_SRAT, 0, &table_header);
    >      +	if (status == AE_NOT_FOUND) {
    >      +		pr_warn("SRAT table not found\n");
    >      +		return;
    >      +	} else if (ACPI_FAILURE(status)) {
    >      +		const char *err = acpi_format_exception(status);
    >      +		pr_err("SRAT table error: %s\n", err);
    >      +		return;
    >      +	}
    >      +
    >      +	table_end = (unsigned long)table_header + table_header->length;
    >      +
    >      +	/* Parse all entries looking for a match. */
    >      +
    >      +	sub_header = (struct acpi_subtable_header *)
    >      +			((unsigned long)table_header +
    >      +			sizeof(struct acpi_table_srat));
    >      +	subtable_len = sub_header->length;
    >      +
    >      +	while (((unsigned long)sub_header) + subtable_len  < table_end) {
    >      +		/*
    >      +		 * If length is 0, break from this loop to avoid
    >      +		 * infinite loop.
    >      +		 */
    >      +		if (subtable_len == 0) {
    >      +			pr_err("SRAT invalid zero length\n");
    >      +			break;
    >      +		}
    >      +
    >      +		switch (sub_header->type) {
    >      +		case ACPI_SRAT_TYPE_CPU_AFFINITY:
    >      +			cpu = (struct acpi_srat_cpu_affinity *)sub_header;
    >      +			pxm = *((u32 *)cpu->proximity_domain_hi) << 8 |
    >      +					cpu->proximity_domain_lo;
    >      +			if (pxm > max_pxm)
    >      +				max_pxm = pxm;
    >      +			break;
    >      +		case ACPI_SRAT_TYPE_GENERIC_AFFINITY:
    >      +			gpu = (struct acpi_srat_generic_affinity *)sub_header;
    >      +			bdf = *((u16 *)(&gpu->device_handle[0])) << 16 |
    >      +					*((u16 *)(&gpu->device_handle[2]));
    >      +			if (bdf == pci_id) {
    >      +				found = true;
    >      +				*numa_node = pxm_to_node(gpu->proximity_domain);
    >      +			}
    >      +			break;
    >      +		default:
    >      +			break;
    >      +		}
    >      +
    >      +		if (found)
    >      +			break;
    >      +
    >      +		sub_header = (struct acpi_subtable_header *)
    >      +				((unsigned long)sub_header + subtable_len);
    >      +		subtable_len = sub_header->length;
    >      +	}
    >      +
    >      +	/* workaround bad cpu-gpu binding case */
    >      +	if (found && (*numa_node < 0 || *numa_node > max_pxm))
    >      +		*numa_node = 0;
    >      +}
    >      +#endif
    >      +
    >       /* kfd_fill_gpu_direct_io_link - Fill in direct io link from GPU
    >        * to its NUMA node
    >        *	@avail_size: Available size in the memory
    >      @@ -1774,6 +1855,9 @@ static int kfd_fill_gpu_direct_io_link_to_cpu(int *avail_size,
    >       			uint32_t proximity_domain)
    >       {
    >       	struct amdgpu_device *adev = (struct amdgpu_device *)kdev->kgd;
    >      +#ifdef CONFIG_NUMA
    >      +	int numa_node = 0;
    >      +#endif
    >
    >       	*avail_size -= sizeof(struct crat_subtype_iolink);
    >       	if (*avail_size < 0)
    >      @@ -1805,9 +1889,13 @@ static int kfd_fill_gpu_direct_io_link_to_cpu(int *avail_size,
    >
    >       	sub_type_hdr->proximity_domain_from = proximity_domain;
    >       #ifdef CONFIG_NUMA
    >      -	if (kdev->pdev->dev.numa_node == NUMA_NO_NODE)
    >      -		sub_type_hdr->proximity_domain_to = 0;
    >      -	else
    >      +	if (kdev->pdev->dev.numa_node == NUMA_NO_NODE) {
    >      +#ifdef CONFIG_ACPI
    >      +		kfd_find_numa_node_in_srat(kdev, &numa_node);
    >      +#endif
    >      +		sub_type_hdr->proximity_domain_to = numa_node;
    >      +		set_dev_node(&kdev->pdev->dev, numa_node);
    >      +	} else
    >       		sub_type_hdr->proximity_domain_to = kdev->pdev->dev.numa_node;
    >       #else
    >       	sub_type_hdr->proximity_domain_to = 0;
    >      --
    >      2.17.1
    >
    >      _______________________________________________
    >      amd-gfx mailing list
    >      amd-gfx@lists.freedesktop.org
    >      https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Famd-gfx&amp;data=04%7C01%7Coak.zeng%40amd.com%7C96808a6aab7b40861eeb08d90a580524%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637552195437139248%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&amp;sdata=vx9wqAehK7zUJy2mVAp9KOV3u4ZNvE%2BmDGfGGK%2F8chU%3D&amp;reserved=0
    >


_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 18+ messages in thread

* RE: [PATCH] drm/amdkfd: add ACPI SRAT parsing for topology
  2021-04-28 15:11 [PATCH] drm/amdkfd: add ACPI SRAT parsing for topology Eric Huang
                   ` (2 preceding siblings ...)
  2021-05-03 18:43 ` Zeng, Oak
@ 2021-05-04  7:46 ` Lazar, Lijo
  2021-05-04 14:00   ` Eric Huang
  3 siblings, 1 reply; 18+ messages in thread
From: Lazar, Lijo @ 2021-05-04  7:46 UTC (permalink / raw)
  To: Huang, JinHuiEric, amd-gfx; +Cc: Huang, JinHuiEric

[AMD Public Use]

> *numa_node > max_pxm

Why numa node number is compared to a proximity domain? Since you are already using pxm_to_node() API, assume that should take care.

That also will avoid parsing ACPI_SRAT_TYPE_CPU_AFFINITY structs.

Thanks,
Lijo


-----Original Message-----
From: amd-gfx <amd-gfx-bounces@lists.freedesktop.org> On Behalf Of Eric Huang
Sent: Wednesday, April 28, 2021 8:42 PM
To: amd-gfx@lists.freedesktop.org
Cc: Huang, JinHuiEric <JinHuiEric.Huang@amd.com>
Subject: [PATCH] drm/amdkfd: add ACPI SRAT parsing for topology

In NPS4 BIOS we need to find the closest numa node when creating topology io link between cpu and gpu, if PCI driver doesn't set it.

Signed-off-by: Eric Huang <jinhuieric.huang@amd.com>
---
 drivers/gpu/drm/amd/amdkfd/kfd_crat.c | 94 ++++++++++++++++++++++++++-
 1 file changed, 91 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
index 38d45711675f..57518136c7d7 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
@@ -1759,6 +1759,87 @@ static int kfd_fill_gpu_memory_affinity(int *avail_size,
 	return 0;
 }
 
+#ifdef CONFIG_ACPI
+static void kfd_find_numa_node_in_srat(struct kfd_dev *kdev,
+		int *numa_node)
+{
+	struct acpi_table_header *table_header = NULL;
+	struct acpi_subtable_header *sub_header = NULL;
+	unsigned long table_end, subtable_len;
+	u32 pci_id = pci_domain_nr(kdev->pdev->bus) << 16 |
+			pci_dev_id(kdev->pdev);
+	u32 bdf;
+	acpi_status status;
+	struct acpi_srat_cpu_affinity *cpu;
+	struct acpi_srat_generic_affinity *gpu;
+	int pxm = 0, max_pxm = 0;
+	bool found = false;
+
+	/* Fetch the SRAT table from ACPI */
+	status = acpi_get_table(ACPI_SIG_SRAT, 0, &table_header);
+	if (status == AE_NOT_FOUND) {
+		pr_warn("SRAT table not found\n");
+		return;
+	} else if (ACPI_FAILURE(status)) {
+		const char *err = acpi_format_exception(status);
+		pr_err("SRAT table error: %s\n", err);
+		return;
+	}
+
+	table_end = (unsigned long)table_header + table_header->length;
+
+	/* Parse all entries looking for a match. */
+
+	sub_header = (struct acpi_subtable_header *)
+			((unsigned long)table_header +
+			sizeof(struct acpi_table_srat));
+	subtable_len = sub_header->length;
+
+	while (((unsigned long)sub_header) + subtable_len  < table_end) {
+		/*
+		 * If length is 0, break from this loop to avoid
+		 * infinite loop.
+		 */
+		if (subtable_len == 0) {
+			pr_err("SRAT invalid zero length\n");
+			break;
+		}
+
+		switch (sub_header->type) {
+		case ACPI_SRAT_TYPE_CPU_AFFINITY:
+			cpu = (struct acpi_srat_cpu_affinity *)sub_header;
+			pxm = *((u32 *)cpu->proximity_domain_hi) << 8 |
+					cpu->proximity_domain_lo;
+			if (pxm > max_pxm)
+				max_pxm = pxm;
+			break;
+		case ACPI_SRAT_TYPE_GENERIC_AFFINITY:
+			gpu = (struct acpi_srat_generic_affinity *)sub_header;
+			bdf = *((u16 *)(&gpu->device_handle[0])) << 16 |
+					*((u16 *)(&gpu->device_handle[2]));
+			if (bdf == pci_id) {
+				found = true;
+				*numa_node = pxm_to_node(gpu->proximity_domain);
+			}
+			break;
+		default:
+			break;
+		}
+
+		if (found)
+			break;
+
+		sub_header = (struct acpi_subtable_header *)
+				((unsigned long)sub_header + subtable_len);
+		subtable_len = sub_header->length;
+	}
+
+	/* workaround bad cpu-gpu binding case */
+	if (found && (*numa_node < 0 || *numa_node > max_pxm))
+		*numa_node = 0;
+}
+#endif
+
 /* kfd_fill_gpu_direct_io_link - Fill in direct io link from GPU
  * to its NUMA node
  *	@avail_size: Available size in the memory
@@ -1774,6 +1855,9 @@ static int kfd_fill_gpu_direct_io_link_to_cpu(int *avail_size,
 			uint32_t proximity_domain)
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)kdev->kgd;
+#ifdef CONFIG_NUMA
+	int numa_node = 0;
+#endif
 
 	*avail_size -= sizeof(struct crat_subtype_iolink);
 	if (*avail_size < 0)
@@ -1805,9 +1889,13 @@ static int kfd_fill_gpu_direct_io_link_to_cpu(int *avail_size,
 
 	sub_type_hdr->proximity_domain_from = proximity_domain;  #ifdef CONFIG_NUMA
-	if (kdev->pdev->dev.numa_node == NUMA_NO_NODE)
-		sub_type_hdr->proximity_domain_to = 0;
-	else
+	if (kdev->pdev->dev.numa_node == NUMA_NO_NODE) { #ifdef CONFIG_ACPI
+		kfd_find_numa_node_in_srat(kdev, &numa_node); #endif
+		sub_type_hdr->proximity_domain_to = numa_node;
+		set_dev_node(&kdev->pdev->dev, numa_node);
+	} else
 		sub_type_hdr->proximity_domain_to = kdev->pdev->dev.numa_node;  #else
 	sub_type_hdr->proximity_domain_to = 0;
--
2.17.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Famd-gfx&amp;data=04%7C01%7Clijo.lazar%40amd.com%7C96808a6aab7b40861eeb08d90a580524%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637552195438132467%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&amp;sdata=ipBmGTX%2Fokto1zRuQ8jlDA8p%2B8BOjHZa5WGGKNJszEY%3D&amp;reserved=0
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 18+ messages in thread

* Re: [PATCH] drm/amdkfd: add ACPI SRAT parsing for topology
  2021-05-04  7:46 ` Lazar, Lijo
@ 2021-05-04 14:00   ` Eric Huang
  2021-05-04 14:30     ` Lazar, Lijo
  0 siblings, 1 reply; 18+ messages in thread
From: Eric Huang @ 2021-05-04 14:00 UTC (permalink / raw)
  To: Lazar, Lijo, amd-gfx

Like I answer Oak's question,
"For GCD parsing, the relation of GCD to CCD is defined by AMD, generic 
parsing in srat.c is considering a GCD as a new numa node which is not 
suitable for our need."

GCD's pxm domain will get a wrong numa node which may be bigger than CCD 
domains, so I have to do a sanity check to correct it.

Regards,
Eric

On 2021-05-04 3:46 a.m., Lazar, Lijo wrote:
> [AMD Public Use]
>
>> *numa_node > max_pxm
> Why numa node number is compared to a proximity domain? Since you are already using pxm_to_node() API, assume that should take care.
>
> That also will avoid parsing ACPI_SRAT_TYPE_CPU_AFFINITY structs.
>
> Thanks,
> Lijo
>
>
> -----Original Message-----
> From: amd-gfx <amd-gfx-bounces@lists.freedesktop.org> On Behalf Of Eric Huang
> Sent: Wednesday, April 28, 2021 8:42 PM
> To: amd-gfx@lists.freedesktop.org
> Cc: Huang, JinHuiEric <JinHuiEric.Huang@amd.com>
> Subject: [PATCH] drm/amdkfd: add ACPI SRAT parsing for topology
>
> In NPS4 BIOS we need to find the closest numa node when creating topology io link between cpu and gpu, if PCI driver doesn't set it.
>
> Signed-off-by: Eric Huang <jinhuieric.huang@amd.com>
> ---
>   drivers/gpu/drm/amd/amdkfd/kfd_crat.c | 94 ++++++++++++++++++++++++++-
>   1 file changed, 91 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
> index 38d45711675f..57518136c7d7 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
> @@ -1759,6 +1759,87 @@ static int kfd_fill_gpu_memory_affinity(int *avail_size,
>   	return 0;
>   }
>   
> +#ifdef CONFIG_ACPI
> +static void kfd_find_numa_node_in_srat(struct kfd_dev *kdev,
> +		int *numa_node)
> +{
> +	struct acpi_table_header *table_header = NULL;
> +	struct acpi_subtable_header *sub_header = NULL;
> +	unsigned long table_end, subtable_len;
> +	u32 pci_id = pci_domain_nr(kdev->pdev->bus) << 16 |
> +			pci_dev_id(kdev->pdev);
> +	u32 bdf;
> +	acpi_status status;
> +	struct acpi_srat_cpu_affinity *cpu;
> +	struct acpi_srat_generic_affinity *gpu;
> +	int pxm = 0, max_pxm = 0;
> +	bool found = false;
> +
> +	/* Fetch the SRAT table from ACPI */
> +	status = acpi_get_table(ACPI_SIG_SRAT, 0, &table_header);
> +	if (status == AE_NOT_FOUND) {
> +		pr_warn("SRAT table not found\n");
> +		return;
> +	} else if (ACPI_FAILURE(status)) {
> +		const char *err = acpi_format_exception(status);
> +		pr_err("SRAT table error: %s\n", err);
> +		return;
> +	}
> +
> +	table_end = (unsigned long)table_header + table_header->length;
> +
> +	/* Parse all entries looking for a match. */
> +
> +	sub_header = (struct acpi_subtable_header *)
> +			((unsigned long)table_header +
> +			sizeof(struct acpi_table_srat));
> +	subtable_len = sub_header->length;
> +
> +	while (((unsigned long)sub_header) + subtable_len  < table_end) {
> +		/*
> +		 * If length is 0, break from this loop to avoid
> +		 * infinite loop.
> +		 */
> +		if (subtable_len == 0) {
> +			pr_err("SRAT invalid zero length\n");
> +			break;
> +		}
> +
> +		switch (sub_header->type) {
> +		case ACPI_SRAT_TYPE_CPU_AFFINITY:
> +			cpu = (struct acpi_srat_cpu_affinity *)sub_header;
> +			pxm = *((u32 *)cpu->proximity_domain_hi) << 8 |
> +					cpu->proximity_domain_lo;
> +			if (pxm > max_pxm)
> +				max_pxm = pxm;
> +			break;
> +		case ACPI_SRAT_TYPE_GENERIC_AFFINITY:
> +			gpu = (struct acpi_srat_generic_affinity *)sub_header;
> +			bdf = *((u16 *)(&gpu->device_handle[0])) << 16 |
> +					*((u16 *)(&gpu->device_handle[2]));
> +			if (bdf == pci_id) {
> +				found = true;
> +				*numa_node = pxm_to_node(gpu->proximity_domain);
> +			}
> +			break;
> +		default:
> +			break;
> +		}
> +
> +		if (found)
> +			break;
> +
> +		sub_header = (struct acpi_subtable_header *)
> +				((unsigned long)sub_header + subtable_len);
> +		subtable_len = sub_header->length;
> +	}
> +
> +	/* workaround bad cpu-gpu binding case */
> +	if (found && (*numa_node < 0 || *numa_node > max_pxm))
> +		*numa_node = 0;
> +}
> +#endif
> +
>   /* kfd_fill_gpu_direct_io_link - Fill in direct io link from GPU
>    * to its NUMA node
>    *	@avail_size: Available size in the memory
> @@ -1774,6 +1855,9 @@ static int kfd_fill_gpu_direct_io_link_to_cpu(int *avail_size,
>   			uint32_t proximity_domain)
>   {
>   	struct amdgpu_device *adev = (struct amdgpu_device *)kdev->kgd;
> +#ifdef CONFIG_NUMA
> +	int numa_node = 0;
> +#endif
>   
>   	*avail_size -= sizeof(struct crat_subtype_iolink);
>   	if (*avail_size < 0)
> @@ -1805,9 +1889,13 @@ static int kfd_fill_gpu_direct_io_link_to_cpu(int *avail_size,
>   
>   	sub_type_hdr->proximity_domain_from = proximity_domain;  #ifdef CONFIG_NUMA
> -	if (kdev->pdev->dev.numa_node == NUMA_NO_NODE)
> -		sub_type_hdr->proximity_domain_to = 0;
> -	else
> +	if (kdev->pdev->dev.numa_node == NUMA_NO_NODE) { #ifdef CONFIG_ACPI
> +		kfd_find_numa_node_in_srat(kdev, &numa_node); #endif
> +		sub_type_hdr->proximity_domain_to = numa_node;
> +		set_dev_node(&kdev->pdev->dev, numa_node);
> +	} else
>   		sub_type_hdr->proximity_domain_to = kdev->pdev->dev.numa_node;  #else
>   	sub_type_hdr->proximity_domain_to = 0;
> --
> 2.17.1
>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Famd-gfx&amp;data=04%7C01%7Clijo.lazar%40amd.com%7C96808a6aab7b40861eeb08d90a580524%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637552195438132467%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&amp;sdata=ipBmGTX%2Fokto1zRuQ8jlDA8p%2B8BOjHZa5WGGKNJszEY%3D&amp;reserved=0

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] drm/amdkfd: add ACPI SRAT parsing for topology
  2021-05-04 14:00   ` Eric Huang
@ 2021-05-04 14:30     ` Lazar, Lijo
  2021-05-04 14:35       ` Eric Huang
  0 siblings, 1 reply; 18+ messages in thread
From: Lazar, Lijo @ 2021-05-04 14:30 UTC (permalink / raw)
  To: Huang, JinHuiEric, amd-gfx


[-- Attachment #1.1: Type: text/plain, Size: 7274 bytes --]

[AMD Official Use Only - Internal Distribution Only]

Converting using pxm_to_node and then comparing against pxm value looks a bit odd. Shouldn't the comparsion be between equals - node to node or pxm to pxm?

Thanks,
Lijo
________________________________
From: Huang, JinHuiEric <JinHuiEric.Huang@amd.com>
Sent: Tuesday, May 4, 2021 7:30:44 PM
To: Lazar, Lijo <Lijo.Lazar@amd.com>; amd-gfx@lists.freedesktop.org <amd-gfx@lists.freedesktop.org>
Subject: Re: [PATCH] drm/amdkfd: add ACPI SRAT parsing for topology

Like I answer Oak's question,
"For GCD parsing, the relation of GCD to CCD is defined by AMD, generic
parsing in srat.c is considering a GCD as a new numa node which is not
suitable for our need."

GCD's pxm domain will get a wrong numa node which may be bigger than CCD
domains, so I have to do a sanity check to correct it.

Regards,
Eric

On 2021-05-04 3:46 a.m., Lazar, Lijo wrote:
> [AMD Public Use]
>
>> *numa_node > max_pxm
> Why numa node number is compared to a proximity domain? Since you are already using pxm_to_node() API, assume that should take care.
>
> That also will avoid parsing ACPI_SRAT_TYPE_CPU_AFFINITY structs.
>
> Thanks,
> Lijo
>
>
> -----Original Message-----
> From: amd-gfx <amd-gfx-bounces@lists.freedesktop.org> On Behalf Of Eric Huang
> Sent: Wednesday, April 28, 2021 8:42 PM
> To: amd-gfx@lists.freedesktop.org
> Cc: Huang, JinHuiEric <JinHuiEric.Huang@amd.com>
> Subject: [PATCH] drm/amdkfd: add ACPI SRAT parsing for topology
>
> In NPS4 BIOS we need to find the closest numa node when creating topology io link between cpu and gpu, if PCI driver doesn't set it.
>
> Signed-off-by: Eric Huang <jinhuieric.huang@amd.com>
> ---
>   drivers/gpu/drm/amd/amdkfd/kfd_crat.c | 94 ++++++++++++++++++++++++++-
>   1 file changed, 91 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
> index 38d45711675f..57518136c7d7 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
> @@ -1759,6 +1759,87 @@ static int kfd_fill_gpu_memory_affinity(int *avail_size,
>        return 0;
>   }
>
> +#ifdef CONFIG_ACPI
> +static void kfd_find_numa_node_in_srat(struct kfd_dev *kdev,
> +             int *numa_node)
> +{
> +     struct acpi_table_header *table_header = NULL;
> +     struct acpi_subtable_header *sub_header = NULL;
> +     unsigned long table_end, subtable_len;
> +     u32 pci_id = pci_domain_nr(kdev->pdev->bus) << 16 |
> +                     pci_dev_id(kdev->pdev);
> +     u32 bdf;
> +     acpi_status status;
> +     struct acpi_srat_cpu_affinity *cpu;
> +     struct acpi_srat_generic_affinity *gpu;
> +     int pxm = 0, max_pxm = 0;
> +     bool found = false;
> +
> +     /* Fetch the SRAT table from ACPI */
> +     status = acpi_get_table(ACPI_SIG_SRAT, 0, &table_header);
> +     if (status == AE_NOT_FOUND) {
> +             pr_warn("SRAT table not found\n");
> +             return;
> +     } else if (ACPI_FAILURE(status)) {
> +             const char *err = acpi_format_exception(status);
> +             pr_err("SRAT table error: %s\n", err);
> +             return;
> +     }
> +
> +     table_end = (unsigned long)table_header + table_header->length;
> +
> +     /* Parse all entries looking for a match. */
> +
> +     sub_header = (struct acpi_subtable_header *)
> +                     ((unsigned long)table_header +
> +                     sizeof(struct acpi_table_srat));
> +     subtable_len = sub_header->length;
> +
> +     while (((unsigned long)sub_header) + subtable_len  < table_end) {
> +             /*
> +              * If length is 0, break from this loop to avoid
> +              * infinite loop.
> +              */
> +             if (subtable_len == 0) {
> +                     pr_err("SRAT invalid zero length\n");
> +                     break;
> +             }
> +
> +             switch (sub_header->type) {
> +             case ACPI_SRAT_TYPE_CPU_AFFINITY:
> +                     cpu = (struct acpi_srat_cpu_affinity *)sub_header;
> +                     pxm = *((u32 *)cpu->proximity_domain_hi) << 8 |
> +                                     cpu->proximity_domain_lo;
> +                     if (pxm > max_pxm)
> +                             max_pxm = pxm;
> +                     break;
> +             case ACPI_SRAT_TYPE_GENERIC_AFFINITY:
> +                     gpu = (struct acpi_srat_generic_affinity *)sub_header;
> +                     bdf = *((u16 *)(&gpu->device_handle[0])) << 16 |
> +                                     *((u16 *)(&gpu->device_handle[2]));
> +                     if (bdf == pci_id) {
> +                             found = true;
> +                             *numa_node = pxm_to_node(gpu->proximity_domain);
> +                     }
> +                     break;
> +             default:
> +                     break;
> +             }
> +
> +             if (found)
> +                     break;
> +
> +             sub_header = (struct acpi_subtable_header *)
> +                             ((unsigned long)sub_header + subtable_len);
> +             subtable_len = sub_header->length;
> +     }
> +
> +     /* workaround bad cpu-gpu binding case */
> +     if (found && (*numa_node < 0 || *numa_node > max_pxm))
> +             *numa_node = 0;
> +}
> +#endif
> +
>   /* kfd_fill_gpu_direct_io_link - Fill in direct io link from GPU
>    * to its NUMA node
>    *  @avail_size: Available size in the memory
> @@ -1774,6 +1855,9 @@ static int kfd_fill_gpu_direct_io_link_to_cpu(int *avail_size,
>                        uint32_t proximity_domain)
>   {
>        struct amdgpu_device *adev = (struct amdgpu_device *)kdev->kgd;
> +#ifdef CONFIG_NUMA
> +     int numa_node = 0;
> +#endif
>
>        *avail_size -= sizeof(struct crat_subtype_iolink);
>        if (*avail_size < 0)
> @@ -1805,9 +1889,13 @@ static int kfd_fill_gpu_direct_io_link_to_cpu(int *avail_size,
>
>        sub_type_hdr->proximity_domain_from = proximity_domain;  #ifdef CONFIG_NUMA
> -     if (kdev->pdev->dev.numa_node == NUMA_NO_NODE)
> -             sub_type_hdr->proximity_domain_to = 0;
> -     else
> +     if (kdev->pdev->dev.numa_node == NUMA_NO_NODE) { #ifdef CONFIG_ACPI
> +             kfd_find_numa_node_in_srat(kdev, &numa_node); #endif
> +             sub_type_hdr->proximity_domain_to = numa_node;
> +             set_dev_node(&kdev->pdev->dev, numa_node);
> +     } else
>                sub_type_hdr->proximity_domain_to = kdev->pdev->dev.numa_node;  #else
>        sub_type_hdr->proximity_domain_to = 0;
> --
> 2.17.1
>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Famd-gfx&amp;data=04%7C01%7Clijo.lazar%40amd.com%7C96808a6aab7b40861eeb08d90a580524%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637552195438132467%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&amp;sdata=ipBmGTX%2Fokto1zRuQ8jlDA8p%2B8BOjHZa5WGGKNJszEY%3D&amp;reserved=0


[-- Attachment #1.2: Type: text/html, Size: 14912 bytes --]

[-- Attachment #2: Type: text/plain, Size: 154 bytes --]

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] drm/amdkfd: add ACPI SRAT parsing for topology
  2021-05-04 14:30     ` Lazar, Lijo
@ 2021-05-04 14:35       ` Eric Huang
  0 siblings, 0 replies; 18+ messages in thread
From: Eric Huang @ 2021-05-04 14:35 UTC (permalink / raw)
  To: Lazar, Lijo, amd-gfx


[-- Attachment #1.1: Type: text/plain, Size: 8976 bytes --]

Good point. Thanks.

Eric

On 2021-05-04 10:30 a.m., Lazar, Lijo wrote:
>
> [AMD Official Use Only - Internal Distribution Only]
>
>
> Converting using pxm_to_node and then comparing against pxm value 
> looks a bit odd. Shouldn't the comparsion be between equals - node to 
> node or pxm to pxm?
>
> Thanks,
> Lijo
> ------------------------------------------------------------------------
> *From:* Huang, JinHuiEric <JinHuiEric.Huang@amd.com>
> *Sent:* Tuesday, May 4, 2021 7:30:44 PM
> *To:* Lazar, Lijo <Lijo.Lazar@amd.com>; amd-gfx@lists.freedesktop.org 
> <amd-gfx@lists.freedesktop.org>
> *Subject:* Re: [PATCH] drm/amdkfd: add ACPI SRAT parsing for topology
> Like I answer Oak's question,
> "For GCD parsing, the relation of GCD to CCD is defined by AMD, generic
> parsing in srat.c is considering a GCD as a new numa node which is not
> suitable for our need."
>
> GCD's pxm domain will get a wrong numa node which may be bigger than CCD
> domains, so I have to do a sanity check to correct it.
>
> Regards,
> Eric
>
> On 2021-05-04 3:46 a.m., Lazar, Lijo wrote:
> > [AMD Public Use]
> >
> >> *numa_node > max_pxm
> > Why numa node number is compared to a proximity domain? Since you 
> are already using pxm_to_node() API, assume that should take care.
> >
> > That also will avoid parsing ACPI_SRAT_TYPE_CPU_AFFINITY structs.
> >
> > Thanks,
> > Lijo
> >
> >
> > -----Original Message-----
> > From: amd-gfx <amd-gfx-bounces@lists.freedesktop.org> On Behalf Of 
> Eric Huang
> > Sent: Wednesday, April 28, 2021 8:42 PM
> > To: amd-gfx@lists.freedesktop.org
> > Cc: Huang, JinHuiEric <JinHuiEric.Huang@amd.com>
> > Subject: [PATCH] drm/amdkfd: add ACPI SRAT parsing for topology
> >
> > In NPS4 BIOS we need to find the closest numa node when creating 
> topology io link between cpu and gpu, if PCI driver doesn't set it.
> >
> > Signed-off-by: Eric Huang <jinhuieric.huang@amd.com>
> > ---
> >   drivers/gpu/drm/amd/amdkfd/kfd_crat.c | 94 ++++++++++++++++++++++++++-
> >   1 file changed, 91 insertions(+), 3 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c 
> b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
> > index 38d45711675f..57518136c7d7 100644
> > --- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
> > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
> > @@ -1759,6 +1759,87 @@ static int kfd_fill_gpu_memory_affinity(int 
> *avail_size,
> >        return 0;
> >   }
> >
> > +#ifdef CONFIG_ACPI
> > +static void kfd_find_numa_node_in_srat(struct kfd_dev *kdev,
> > +             int *numa_node)
> > +{
> > +     struct acpi_table_header *table_header = NULL;
> > +     struct acpi_subtable_header *sub_header = NULL;
> > +     unsigned long table_end, subtable_len;
> > +     u32 pci_id = pci_domain_nr(kdev->pdev->bus) << 16 |
> > +                     pci_dev_id(kdev->pdev);
> > +     u32 bdf;
> > +     acpi_status status;
> > +     struct acpi_srat_cpu_affinity *cpu;
> > +     struct acpi_srat_generic_affinity *gpu;
> > +     int pxm = 0, max_pxm = 0;
> > +     bool found = false;
> > +
> > +     /* Fetch the SRAT table from ACPI */
> > +     status = acpi_get_table(ACPI_SIG_SRAT, 0, &table_header);
> > +     if (status == AE_NOT_FOUND) {
> > +             pr_warn("SRAT table not found\n");
> > +             return;
> > +     } else if (ACPI_FAILURE(status)) {
> > +             const char *err = acpi_format_exception(status);
> > +             pr_err("SRAT table error: %s\n", err);
> > +             return;
> > +     }
> > +
> > +     table_end = (unsigned long)table_header + table_header->length;
> > +
> > +     /* Parse all entries looking for a match. */
> > +
> > +     sub_header = (struct acpi_subtable_header *)
> > +                     ((unsigned long)table_header +
> > +                     sizeof(struct acpi_table_srat));
> > +     subtable_len = sub_header->length;
> > +
> > +     while (((unsigned long)sub_header) + subtable_len  < table_end) {
> > +             /*
> > +              * If length is 0, break from this loop to avoid
> > +              * infinite loop.
> > +              */
> > +             if (subtable_len == 0) {
> > +                     pr_err("SRAT invalid zero length\n");
> > +                     break;
> > +             }
> > +
> > +             switch (sub_header->type) {
> > +             case ACPI_SRAT_TYPE_CPU_AFFINITY:
> > +                     cpu = (struct acpi_srat_cpu_affinity *)sub_header;
> > +                     pxm = *((u32 *)cpu->proximity_domain_hi) << 8 |
> > + cpu->proximity_domain_lo;
> > +                     if (pxm > max_pxm)
> > +                             max_pxm = pxm;
> > +                     break;
> > +             case ACPI_SRAT_TYPE_GENERIC_AFFINITY:
> > +                     gpu = (struct acpi_srat_generic_affinity 
> *)sub_header;
> > +                     bdf = *((u16 *)(&gpu->device_handle[0])) << 16 |
> > +                                     *((u16 
> *)(&gpu->device_handle[2]));
> > +                     if (bdf == pci_id) {
> > +                             found = true;
> > +                             *numa_node = 
> pxm_to_node(gpu->proximity_domain);
> > +                     }
> > +                     break;
> > +             default:
> > +                     break;
> > +             }
> > +
> > +             if (found)
> > +                     break;
> > +
> > +             sub_header = (struct acpi_subtable_header *)
> > +                             ((unsigned long)sub_header + 
> subtable_len);
> > +             subtable_len = sub_header->length;
> > +     }
> > +
> > +     /* workaround bad cpu-gpu binding case */
> > +     if (found && (*numa_node < 0 || *numa_node > max_pxm))
> > +             *numa_node = 0;
> > +}
> > +#endif
> > +
> >   /* kfd_fill_gpu_direct_io_link - Fill in direct io link from GPU
> >    * to its NUMA node
> >    *  @avail_size: Available size in the memory
> > @@ -1774,6 +1855,9 @@ static int 
> kfd_fill_gpu_direct_io_link_to_cpu(int *avail_size,
> >                        uint32_t proximity_domain)
> >   {
> >        struct amdgpu_device *adev = (struct amdgpu_device *)kdev->kgd;
> > +#ifdef CONFIG_NUMA
> > +     int numa_node = 0;
> > +#endif
> >
> >        *avail_size -= sizeof(struct crat_subtype_iolink);
> >        if (*avail_size < 0)
> > @@ -1805,9 +1889,13 @@ static int 
> kfd_fill_gpu_direct_io_link_to_cpu(int *avail_size,
> >
> >        sub_type_hdr->proximity_domain_from = proximity_domain;  
> #ifdef CONFIG_NUMA
> > -     if (kdev->pdev->dev.numa_node == NUMA_NO_NODE)
> > -             sub_type_hdr->proximity_domain_to = 0;
> > -     else
> > +     if (kdev->pdev->dev.numa_node == NUMA_NO_NODE) { #ifdef 
> CONFIG_ACPI
> > +             kfd_find_numa_node_in_srat(kdev, &numa_node); #endif
> > +             sub_type_hdr->proximity_domain_to = numa_node;
> > + set_dev_node(&kdev->pdev->dev, numa_node);
> > +     } else
> >                sub_type_hdr->proximity_domain_to = 
> kdev->pdev->dev.numa_node;  #else
> >        sub_type_hdr->proximity_domain_to = 0;
> > --
> > 2.17.1
> >
> > _______________________________________________
> > amd-gfx mailing list
> > amd-gfx@lists.freedesktop.org
> > 
> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Famd-gfx&amp;data=04%7C01%7Clijo.lazar%40amd.com%7C96808a6aab7b40861eeb08d90a580524%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637552195438132467%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&amp;sdata=ipBmGTX%2Fokto1zRuQ8jlDA8p%2B8BOjHZa5WGGKNJszEY%3D&amp;reserved=0 
> <https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Famd-gfx&amp;data=04%7C01%7Clijo.lazar%40amd.com%7C96808a6aab7b40861eeb08d90a580524%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637552195438132467%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&amp;sdata=ipBmGTX%2Fokto1zRuQ8jlDA8p%2B8BOjHZa5WGGKNJszEY%3D&amp;reserved=0>
>


[-- Attachment #1.2: Type: text/html, Size: 19453 bytes --]

[-- Attachment #2: Type: text/plain, Size: 154 bytes --]

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] drm/amdkfd: add ACPI SRAT parsing for topology
  2021-05-05 13:51 Eric Huang
@ 2021-05-05 20:27 ` Felix Kuehling
  0 siblings, 0 replies; 18+ messages in thread
From: Felix Kuehling @ 2021-05-05 20:27 UTC (permalink / raw)
  To: Eric Huang, amd-gfx

Am 2021-05-05 um 9:51 a.m. schrieb Eric Huang:
> In NPS4 BIOS we need to find the closest numa node when creating
> topology io link between cpu and gpu, if PCI driver doesn't set
> it.
>
> Signed-off-by: Eric Huang <jinhuieric.huang@amd.com>

Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>


> ---
>  drivers/gpu/drm/amd/amdkfd/kfd_crat.c | 91 +++++++++++++++++++++++++++
>  1 file changed, 91 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
> index 38d45711675f..0972b1014d6f 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
> @@ -1759,6 +1759,92 @@ static int kfd_fill_gpu_memory_affinity(int *avail_size,
>  	return 0;
>  }
>  
> +#ifdef CONFIG_ACPI_NUMA
> +static void kfd_find_numa_node_in_srat(struct kfd_dev *kdev)
> +{
> +	struct acpi_table_header *table_header = NULL;
> +	struct acpi_subtable_header *sub_header = NULL;
> +	unsigned long table_end, subtable_len;
> +	u32 pci_id = pci_domain_nr(kdev->pdev->bus) << 16 |
> +			pci_dev_id(kdev->pdev);
> +	u32 bdf;
> +	acpi_status status;
> +	struct acpi_srat_cpu_affinity *cpu;
> +	struct acpi_srat_generic_affinity *gpu;
> +	int pxm = 0, max_pxm = 0;
> +	int numa_node = NUMA_NO_NODE;
> +	bool found = false;
> +
> +	/* Fetch the SRAT table from ACPI */
> +	status = acpi_get_table(ACPI_SIG_SRAT, 0, &table_header);
> +	if (status == AE_NOT_FOUND) {
> +		pr_warn("SRAT table not found\n");
> +		return;
> +	} else if (ACPI_FAILURE(status)) {
> +		const char *err = acpi_format_exception(status);
> +		pr_err("SRAT table error: %s\n", err);
> +		return;
> +	}
> +
> +	table_end = (unsigned long)table_header + table_header->length;
> +
> +	/* Parse all entries looking for a match. */
> +	sub_header = (struct acpi_subtable_header *)
> +			((unsigned long)table_header +
> +			sizeof(struct acpi_table_srat));
> +	subtable_len = sub_header->length;
> +
> +	while (((unsigned long)sub_header) + subtable_len  < table_end) {
> +		/*
> +		 * If length is 0, break from this loop to avoid
> +		 * infinite loop.
> +		 */
> +		if (subtable_len == 0) {
> +			pr_err("SRAT invalid zero length\n");
> +			break;
> +		}
> +
> +		switch (sub_header->type) {
> +		case ACPI_SRAT_TYPE_CPU_AFFINITY:
> +			cpu = (struct acpi_srat_cpu_affinity *)sub_header;
> +			pxm = *((u32 *)cpu->proximity_domain_hi) << 8 |
> +					cpu->proximity_domain_lo;
> +			if (pxm > max_pxm)
> +				max_pxm = pxm;
> +			break;
> +		case ACPI_SRAT_TYPE_GENERIC_AFFINITY:
> +			gpu = (struct acpi_srat_generic_affinity *)sub_header;
> +			bdf = *((u16 *)(&gpu->device_handle[0])) << 16 |
> +					*((u16 *)(&gpu->device_handle[2]));
> +			if (bdf == pci_id) {
> +				found = true;
> +				numa_node = pxm_to_node(gpu->proximity_domain);
> +			}
> +			break;
> +		default:
> +			break;
> +		}
> +
> +		if (found)
> +			break;
> +
> +		sub_header = (struct acpi_subtable_header *)
> +				((unsigned long)sub_header + subtable_len);
> +		subtable_len = sub_header->length;
> +	}
> +
> +	acpi_put_table(table_header);
> +
> +	/* Workaround bad cpu-gpu binding case */
> +	if (found && (numa_node < 0 ||
> +			numa_node > pxm_to_node(max_pxm)))
> +		numa_node = 0;
> +
> +	if (numa_node != NUMA_NO_NODE)
> +		set_dev_node(&kdev->pdev->dev, numa_node);
> +}
> +#endif
> +
>  /* kfd_fill_gpu_direct_io_link - Fill in direct io link from GPU
>   * to its NUMA node
>   *	@avail_size: Available size in the memory
> @@ -1804,6 +1890,11 @@ static int kfd_fill_gpu_direct_io_link_to_cpu(int *avail_size,
>  	}
>  
>  	sub_type_hdr->proximity_domain_from = proximity_domain;
> +
> +#ifdef CONFIG_ACPI_NUMA
> +	if (kdev->pdev->dev.numa_node == NUMA_NO_NODE)
> +		kfd_find_numa_node_in_srat(kdev);
> +#endif
>  #ifdef CONFIG_NUMA
>  	if (kdev->pdev->dev.numa_node == NUMA_NO_NODE)
>  		sub_type_hdr->proximity_domain_to = 0;
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 18+ messages in thread

* [PATCH] drm/amdkfd: add ACPI SRAT parsing for topology
@ 2021-05-05 13:51 Eric Huang
  2021-05-05 20:27 ` Felix Kuehling
  0 siblings, 1 reply; 18+ messages in thread
From: Eric Huang @ 2021-05-05 13:51 UTC (permalink / raw)
  To: amd-gfx; +Cc: Eric Huang, felix.kuehling

In NPS4 BIOS we need to find the closest numa node when creating
topology io link between cpu and gpu, if PCI driver doesn't set
it.

Signed-off-by: Eric Huang <jinhuieric.huang@amd.com>
---
 drivers/gpu/drm/amd/amdkfd/kfd_crat.c | 91 +++++++++++++++++++++++++++
 1 file changed, 91 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
index 38d45711675f..0972b1014d6f 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
@@ -1759,6 +1759,92 @@ static int kfd_fill_gpu_memory_affinity(int *avail_size,
 	return 0;
 }
 
+#ifdef CONFIG_ACPI_NUMA
+static void kfd_find_numa_node_in_srat(struct kfd_dev *kdev)
+{
+	struct acpi_table_header *table_header = NULL;
+	struct acpi_subtable_header *sub_header = NULL;
+	unsigned long table_end, subtable_len;
+	u32 pci_id = pci_domain_nr(kdev->pdev->bus) << 16 |
+			pci_dev_id(kdev->pdev);
+	u32 bdf;
+	acpi_status status;
+	struct acpi_srat_cpu_affinity *cpu;
+	struct acpi_srat_generic_affinity *gpu;
+	int pxm = 0, max_pxm = 0;
+	int numa_node = NUMA_NO_NODE;
+	bool found = false;
+
+	/* Fetch the SRAT table from ACPI */
+	status = acpi_get_table(ACPI_SIG_SRAT, 0, &table_header);
+	if (status == AE_NOT_FOUND) {
+		pr_warn("SRAT table not found\n");
+		return;
+	} else if (ACPI_FAILURE(status)) {
+		const char *err = acpi_format_exception(status);
+		pr_err("SRAT table error: %s\n", err);
+		return;
+	}
+
+	table_end = (unsigned long)table_header + table_header->length;
+
+	/* Parse all entries looking for a match. */
+	sub_header = (struct acpi_subtable_header *)
+			((unsigned long)table_header +
+			sizeof(struct acpi_table_srat));
+	subtable_len = sub_header->length;
+
+	while (((unsigned long)sub_header) + subtable_len  < table_end) {
+		/*
+		 * If length is 0, break from this loop to avoid
+		 * infinite loop.
+		 */
+		if (subtable_len == 0) {
+			pr_err("SRAT invalid zero length\n");
+			break;
+		}
+
+		switch (sub_header->type) {
+		case ACPI_SRAT_TYPE_CPU_AFFINITY:
+			cpu = (struct acpi_srat_cpu_affinity *)sub_header;
+			pxm = *((u32 *)cpu->proximity_domain_hi) << 8 |
+					cpu->proximity_domain_lo;
+			if (pxm > max_pxm)
+				max_pxm = pxm;
+			break;
+		case ACPI_SRAT_TYPE_GENERIC_AFFINITY:
+			gpu = (struct acpi_srat_generic_affinity *)sub_header;
+			bdf = *((u16 *)(&gpu->device_handle[0])) << 16 |
+					*((u16 *)(&gpu->device_handle[2]));
+			if (bdf == pci_id) {
+				found = true;
+				numa_node = pxm_to_node(gpu->proximity_domain);
+			}
+			break;
+		default:
+			break;
+		}
+
+		if (found)
+			break;
+
+		sub_header = (struct acpi_subtable_header *)
+				((unsigned long)sub_header + subtable_len);
+		subtable_len = sub_header->length;
+	}
+
+	acpi_put_table(table_header);
+
+	/* Workaround bad cpu-gpu binding case */
+	if (found && (numa_node < 0 ||
+			numa_node > pxm_to_node(max_pxm)))
+		numa_node = 0;
+
+	if (numa_node != NUMA_NO_NODE)
+		set_dev_node(&kdev->pdev->dev, numa_node);
+}
+#endif
+
 /* kfd_fill_gpu_direct_io_link - Fill in direct io link from GPU
  * to its NUMA node
  *	@avail_size: Available size in the memory
@@ -1804,6 +1890,11 @@ static int kfd_fill_gpu_direct_io_link_to_cpu(int *avail_size,
 	}
 
 	sub_type_hdr->proximity_domain_from = proximity_domain;
+
+#ifdef CONFIG_ACPI_NUMA
+	if (kdev->pdev->dev.numa_node == NUMA_NO_NODE)
+		kfd_find_numa_node_in_srat(kdev);
+#endif
 #ifdef CONFIG_NUMA
 	if (kdev->pdev->dev.numa_node == NUMA_NO_NODE)
 		sub_type_hdr->proximity_domain_to = 0;
-- 
2.17.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 18+ messages in thread

* Re: [PATCH] drm/amdkfd: add ACPI SRAT parsing for topology
  2021-05-03 19:34     ` Felix Kuehling
@ 2021-05-05 13:36       ` Eric Huang
  0 siblings, 0 replies; 18+ messages in thread
From: Eric Huang @ 2021-05-05 13:36 UTC (permalink / raw)
  To: Felix Kuehling, amd-gfx



On 2021-05-03 3:34 p.m., Felix Kuehling wrote:
> Am 2021-05-03 um 3:27 p.m. schrieb Eric Huang:
>>
>> On 2021-05-03 3:13 p.m., Felix Kuehling wrote:
>>> Am 2021-05-03 um 10:47 a.m. schrieb Eric Huang:
>>>> In NPS4 BIOS we need to find the closest numa node when creating
>>>> topology io link between cpu and gpu, if PCI driver doesn't set
>>>> it.
>>>>
>>>> Signed-off-by: Eric Huang <jinhuieric.huang@amd.com>
>>>> ---
>>>>    drivers/gpu/drm/amd/amdkfd/kfd_crat.c | 95
>>>> ++++++++++++++++++++++++++-
>>>>    1 file changed, 93 insertions(+), 2 deletions(-)
>>>>
>>>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
>>>> b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
>>>> index 38d45711675f..58c6738de774 100644
>>>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
>>>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
>>>> @@ -1759,6 +1759,91 @@ static int kfd_fill_gpu_memory_affinity(int
>>>> *avail_size,
>>>>        return 0;
>>>>    }
>>>>    +#ifdef CONFIG_ACPI_NUMA
>>>> +static void kfd_find_numa_node_in_srat(struct kfd_dev *kdev)
>>>> +{
>>>> +    struct acpi_table_header *table_header = NULL;
>>>> +    struct acpi_subtable_header *sub_header = NULL;
>>>> +    unsigned long table_end, subtable_len;
>>>> +    u32 pci_id = pci_domain_nr(kdev->pdev->bus) << 16 |
>>>> +            pci_dev_id(kdev->pdev);
>>>> +    u32 bdf;
>>>> +    acpi_status status;
>>>> +    struct acpi_srat_cpu_affinity *cpu;
>>>> +    struct acpi_srat_generic_affinity *gpu;
>>>> +    int pxm = 0, max_pxm = 0;
>>>> +    int numa_node = NUMA_NO_NODE;
>>>> +    bool found = false;
>>>> +
>>>> +    /* Fetch the SRAT table from ACPI */
>>>> +    status = acpi_get_table(ACPI_SIG_SRAT, 0, &table_header);
>>>> +    if (status == AE_NOT_FOUND) {
>>>> +        pr_warn("SRAT table not found\n");
>>>> +        return;
>>>> +    } else if (ACPI_FAILURE(status)) {
>>>> +        const char *err = acpi_format_exception(status);
>>>> +        pr_err("SRAT table error: %s\n", err);
>>>> +        return;
>>>> +    }
>>>> +
>>>> +    table_end = (unsigned long)table_header + table_header->length;
>>>> +
>>>> +    /* Parse all entries looking for a match. */
>>>> +    sub_header = (struct acpi_subtable_header *)
>>>> +            ((unsigned long)table_header +
>>>> +            sizeof(struct acpi_table_srat));
>>>> +    subtable_len = sub_header->length;
>>>> +
>>>> +    while (((unsigned long)sub_header) + subtable_len  < table_end) {
>>>> +        /*
>>>> +         * If length is 0, break from this loop to avoid
>>>> +         * infinite loop.
>>>> +         */
>>>> +        if (subtable_len == 0) {
>>>> +            pr_err("SRAT invalid zero length\n");
>>>> +            break;
>>>> +        }
>>>> +
>>>> +        switch (sub_header->type) {
>>>> +        case ACPI_SRAT_TYPE_CPU_AFFINITY:
>>>> +            cpu = (struct acpi_srat_cpu_affinity *)sub_header;
>>>> +            pxm = *((u32 *)cpu->proximity_domain_hi) << 8 |
>>>> +                    cpu->proximity_domain_lo;
>>>> +            if (pxm > max_pxm)
>>>> +                max_pxm = pxm;
>>>> +            break;
>>>> +        case ACPI_SRAT_TYPE_GENERIC_AFFINITY:
>>>> +            gpu = (struct acpi_srat_generic_affinity *)sub_header;
>>>> +            bdf = *((u16 *)(&gpu->device_handle[0])) << 16 |
>>>> +                    *((u16 *)(&gpu->device_handle[2]));
>>>> +            if (bdf == pci_id) {
>>>> +                found = true;
>>>> +                numa_node = pxm_to_node(gpu->proximity_domain);
>>>> +            }
>>>> +            break;
>>>> +        default:
>>>> +            break;
>>>> +        }
>>>> +
>>>> +        if (found)
>>>> +            break;
>>>> +
>>>> +        sub_header = (struct acpi_subtable_header *)
>>>> +                ((unsigned long)sub_header + subtable_len);
>>>> +        subtable_len = sub_header->length;
>>>> +    }
>>>> +
>>>> +    acpi_put_table(table_header);
>>>> +
>>>> +    /* Workaround bad cpu-gpu binding case */
>>>> +    if (found && (numa_node < 0 || numa_node > max_pxm))
>>>> +        numa_node = 0;
>>>> +
>>>> +    if (numa_node != NUMA_NO_NODE)
>>>> +        set_dev_node(&kdev->pdev->dev, numa_node);
>>>> +}
>>>> +#endif
>>>> +
>>>>    /* kfd_fill_gpu_direct_io_link - Fill in direct io link from GPU
>>>>     * to its NUMA node
>>>>     *    @avail_size: Available size in the memory
>>>> @@ -1804,10 +1889,16 @@ static int
>>>> kfd_fill_gpu_direct_io_link_to_cpu(int *avail_size,
>>>>        }
>>>>          sub_type_hdr->proximity_domain_from = proximity_domain;
>>>> -#ifdef CONFIG_NUMA
>>>> +
>>>> +#ifdef CONFIG_ACPI_NUMA
>>>>        if (kdev->pdev->dev.numa_node == NUMA_NO_NODE)
>>>> +        kfd_find_numa_node_in_srat(kdev);
>>>> +#endif
>>>> +#ifdef CONFIG_NUMA
>>>> +    if (kdev->pdev->dev.numa_node == NUMA_NO_NODE) {
>>>>            sub_type_hdr->proximity_domain_to = 0;
>>>> -    else
>>>> +        set_dev_node(&kdev->pdev->dev, 0);
>>> This should not be here. If you really want to lie about the NUMA node
>>> and pretend that it's 0 and not NO_NODE, then that should be done in
>>> kfd_find_numa_node_in_srat. That should be the only function that
>>> changes the dev->numa_node. Like Oak pointed out, eventually that should
>>> maybe not even be part of the driver. But I'm OK with keeping it as a
>>> fallback in the driver for the case that one a GPU doesn't have a NUMA
>>> node assigned by the kernel.
>>>
>>> But is setting the dev->numa_node to 0 really necessary? Does anything
>>> else in KFD depend on the dev->numa_node being 0? This function is only
>>> supposed to fill the proximity_domain in the CRAT table. Setting
>>> dev->numa_node is a side effect. If we can't figure out the correct NUMA
>>> node, we shouldn't just guess 0 in a way that potentially affects other
>>> parts of the kernel.
>>>
>>> Regards,
>>>     Felix
>>>
>> The reason I am adding it is for
>> http://ontrack-internal.amd.com/browse/SWDEV-281376.
>>
>> RCCL is using /sys/class/drm/card0/device/numa_node to determine numa
>> node which GPU is close to. To keep consistence between KFD topology
>> and pci sysfs exposure, I add it as a workaround in NPS1 and ACPI is
>> not configured.
> I think that's OK if you find a valid NUMA node. But filling in 0 is
> clearly incorrect when the ACPI tables don't provide that information.
> The value -1 is there for just this case where no information is
> available. Tools using the device/numa_node interface in sysfs need to
> be prepared for this. We can pretend the proximity domain is 0 in the
> KFD topology. But we shouldn't do that in the system NUMA topology
> because that changes the semantics of well established kernel interfaces
> outside our driver. It can affect tools outside of ROCm in unexpected ways.
>
> Why does RCCL look at the device/numa_node at all? Could it use the
> proximity domain from the KFD topology instead?
I agree with you. Actually it only affects few corner cases, such as 
ACPI is not configured and wrong GCD entries in NPS1. After correct SRAT 
table is released with BIOS, it totally useless.

Thanks,
Eric
> Regards,
>    Felix
>
>> Thanks,
>> Eric
>>
>>>> +    } else
>>>>            sub_type_hdr->proximity_domain_to =
>>>> kdev->pdev->dev.numa_node;
>>>>    #else
>>>>        sub_type_hdr->proximity_domain_to = 0;

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] drm/amdkfd: add ACPI SRAT parsing for topology
  2021-05-03 19:27   ` Eric Huang
@ 2021-05-03 19:34     ` Felix Kuehling
  2021-05-05 13:36       ` Eric Huang
  0 siblings, 1 reply; 18+ messages in thread
From: Felix Kuehling @ 2021-05-03 19:34 UTC (permalink / raw)
  To: Eric Huang, amd-gfx

Am 2021-05-03 um 3:27 p.m. schrieb Eric Huang:
>
>
> On 2021-05-03 3:13 p.m., Felix Kuehling wrote:
>> Am 2021-05-03 um 10:47 a.m. schrieb Eric Huang:
>>> In NPS4 BIOS we need to find the closest numa node when creating
>>> topology io link between cpu and gpu, if PCI driver doesn't set
>>> it.
>>>
>>> Signed-off-by: Eric Huang <jinhuieric.huang@amd.com>
>>> ---
>>>   drivers/gpu/drm/amd/amdkfd/kfd_crat.c | 95
>>> ++++++++++++++++++++++++++-
>>>   1 file changed, 93 insertions(+), 2 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
>>> b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
>>> index 38d45711675f..58c6738de774 100644
>>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
>>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
>>> @@ -1759,6 +1759,91 @@ static int kfd_fill_gpu_memory_affinity(int
>>> *avail_size,
>>>       return 0;
>>>   }
>>>   +#ifdef CONFIG_ACPI_NUMA
>>> +static void kfd_find_numa_node_in_srat(struct kfd_dev *kdev)
>>> +{
>>> +    struct acpi_table_header *table_header = NULL;
>>> +    struct acpi_subtable_header *sub_header = NULL;
>>> +    unsigned long table_end, subtable_len;
>>> +    u32 pci_id = pci_domain_nr(kdev->pdev->bus) << 16 |
>>> +            pci_dev_id(kdev->pdev);
>>> +    u32 bdf;
>>> +    acpi_status status;
>>> +    struct acpi_srat_cpu_affinity *cpu;
>>> +    struct acpi_srat_generic_affinity *gpu;
>>> +    int pxm = 0, max_pxm = 0;
>>> +    int numa_node = NUMA_NO_NODE;
>>> +    bool found = false;
>>> +
>>> +    /* Fetch the SRAT table from ACPI */
>>> +    status = acpi_get_table(ACPI_SIG_SRAT, 0, &table_header);
>>> +    if (status == AE_NOT_FOUND) {
>>> +        pr_warn("SRAT table not found\n");
>>> +        return;
>>> +    } else if (ACPI_FAILURE(status)) {
>>> +        const char *err = acpi_format_exception(status);
>>> +        pr_err("SRAT table error: %s\n", err);
>>> +        return;
>>> +    }
>>> +
>>> +    table_end = (unsigned long)table_header + table_header->length;
>>> +
>>> +    /* Parse all entries looking for a match. */
>>> +    sub_header = (struct acpi_subtable_header *)
>>> +            ((unsigned long)table_header +
>>> +            sizeof(struct acpi_table_srat));
>>> +    subtable_len = sub_header->length;
>>> +
>>> +    while (((unsigned long)sub_header) + subtable_len  < table_end) {
>>> +        /*
>>> +         * If length is 0, break from this loop to avoid
>>> +         * infinite loop.
>>> +         */
>>> +        if (subtable_len == 0) {
>>> +            pr_err("SRAT invalid zero length\n");
>>> +            break;
>>> +        }
>>> +
>>> +        switch (sub_header->type) {
>>> +        case ACPI_SRAT_TYPE_CPU_AFFINITY:
>>> +            cpu = (struct acpi_srat_cpu_affinity *)sub_header;
>>> +            pxm = *((u32 *)cpu->proximity_domain_hi) << 8 |
>>> +                    cpu->proximity_domain_lo;
>>> +            if (pxm > max_pxm)
>>> +                max_pxm = pxm;
>>> +            break;
>>> +        case ACPI_SRAT_TYPE_GENERIC_AFFINITY:
>>> +            gpu = (struct acpi_srat_generic_affinity *)sub_header;
>>> +            bdf = *((u16 *)(&gpu->device_handle[0])) << 16 |
>>> +                    *((u16 *)(&gpu->device_handle[2]));
>>> +            if (bdf == pci_id) {
>>> +                found = true;
>>> +                numa_node = pxm_to_node(gpu->proximity_domain);
>>> +            }
>>> +            break;
>>> +        default:
>>> +            break;
>>> +        }
>>> +
>>> +        if (found)
>>> +            break;
>>> +
>>> +        sub_header = (struct acpi_subtable_header *)
>>> +                ((unsigned long)sub_header + subtable_len);
>>> +        subtable_len = sub_header->length;
>>> +    }
>>> +
>>> +    acpi_put_table(table_header);
>>> +
>>> +    /* Workaround bad cpu-gpu binding case */
>>> +    if (found && (numa_node < 0 || numa_node > max_pxm))
>>> +        numa_node = 0;
>>> +
>>> +    if (numa_node != NUMA_NO_NODE)
>>> +        set_dev_node(&kdev->pdev->dev, numa_node);
>>> +}
>>> +#endif
>>> +
>>>   /* kfd_fill_gpu_direct_io_link - Fill in direct io link from GPU
>>>    * to its NUMA node
>>>    *    @avail_size: Available size in the memory
>>> @@ -1804,10 +1889,16 @@ static int
>>> kfd_fill_gpu_direct_io_link_to_cpu(int *avail_size,
>>>       }
>>>         sub_type_hdr->proximity_domain_from = proximity_domain;
>>> -#ifdef CONFIG_NUMA
>>> +
>>> +#ifdef CONFIG_ACPI_NUMA
>>>       if (kdev->pdev->dev.numa_node == NUMA_NO_NODE)
>>> +        kfd_find_numa_node_in_srat(kdev);
>>> +#endif
>>> +#ifdef CONFIG_NUMA
>>> +    if (kdev->pdev->dev.numa_node == NUMA_NO_NODE) {
>>>           sub_type_hdr->proximity_domain_to = 0;
>>> -    else
>>> +        set_dev_node(&kdev->pdev->dev, 0);
>> This should not be here. If you really want to lie about the NUMA node
>> and pretend that it's 0 and not NO_NODE, then that should be done in
>> kfd_find_numa_node_in_srat. That should be the only function that
>> changes the dev->numa_node. Like Oak pointed out, eventually that should
>> maybe not even be part of the driver. But I'm OK with keeping it as a
>> fallback in the driver for the case that one a GPU doesn't have a NUMA
>> node assigned by the kernel.
>>
>> But is setting the dev->numa_node to 0 really necessary? Does anything
>> else in KFD depend on the dev->numa_node being 0? This function is only
>> supposed to fill the proximity_domain in the CRAT table. Setting
>> dev->numa_node is a side effect. If we can't figure out the correct NUMA
>> node, we shouldn't just guess 0 in a way that potentially affects other
>> parts of the kernel.
>>
>> Regards,
>>    Felix
>>
> The reason I am adding it is for
> http://ontrack-internal.amd.com/browse/SWDEV-281376.
>
> RCCL is using /sys/class/drm/card0/device/numa_node to determine numa
> node which GPU is close to. To keep consistence between KFD topology
> and pci sysfs exposure, I add it as a workaround in NPS1 and ACPI is
> not configured.

I think that's OK if you find a valid NUMA node. But filling in 0 is
clearly incorrect when the ACPI tables don't provide that information.
The value -1 is there for just this case where no information is
available. Tools using the device/numa_node interface in sysfs need to
be prepared for this. We can pretend the proximity domain is 0 in the
KFD topology. But we shouldn't do that in the system NUMA topology
because that changes the semantics of well established kernel interfaces
outside our driver. It can affect tools outside of ROCm in unexpected ways.

Why does RCCL look at the device/numa_node at all? Could it use the
proximity domain from the KFD topology instead?

Regards,
  Felix

>
> Thanks,
> Eric
>
>>> +    } else
>>>           sub_type_hdr->proximity_domain_to =
>>> kdev->pdev->dev.numa_node;
>>>   #else
>>>       sub_type_hdr->proximity_domain_to = 0;
>
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] drm/amdkfd: add ACPI SRAT parsing for topology
  2021-05-03 19:13 ` Felix Kuehling
@ 2021-05-03 19:27   ` Eric Huang
  2021-05-03 19:34     ` Felix Kuehling
  0 siblings, 1 reply; 18+ messages in thread
From: Eric Huang @ 2021-05-03 19:27 UTC (permalink / raw)
  To: Felix Kuehling, amd-gfx



On 2021-05-03 3:13 p.m., Felix Kuehling wrote:
> Am 2021-05-03 um 10:47 a.m. schrieb Eric Huang:
>> In NPS4 BIOS we need to find the closest numa node when creating
>> topology io link between cpu and gpu, if PCI driver doesn't set
>> it.
>>
>> Signed-off-by: Eric Huang <jinhuieric.huang@amd.com>
>> ---
>>   drivers/gpu/drm/amd/amdkfd/kfd_crat.c | 95 ++++++++++++++++++++++++++-
>>   1 file changed, 93 insertions(+), 2 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
>> index 38d45711675f..58c6738de774 100644
>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
>> @@ -1759,6 +1759,91 @@ static int kfd_fill_gpu_memory_affinity(int *avail_size,
>>   	return 0;
>>   }
>>   
>> +#ifdef CONFIG_ACPI_NUMA
>> +static void kfd_find_numa_node_in_srat(struct kfd_dev *kdev)
>> +{
>> +	struct acpi_table_header *table_header = NULL;
>> +	struct acpi_subtable_header *sub_header = NULL;
>> +	unsigned long table_end, subtable_len;
>> +	u32 pci_id = pci_domain_nr(kdev->pdev->bus) << 16 |
>> +			pci_dev_id(kdev->pdev);
>> +	u32 bdf;
>> +	acpi_status status;
>> +	struct acpi_srat_cpu_affinity *cpu;
>> +	struct acpi_srat_generic_affinity *gpu;
>> +	int pxm = 0, max_pxm = 0;
>> +	int numa_node = NUMA_NO_NODE;
>> +	bool found = false;
>> +
>> +	/* Fetch the SRAT table from ACPI */
>> +	status = acpi_get_table(ACPI_SIG_SRAT, 0, &table_header);
>> +	if (status == AE_NOT_FOUND) {
>> +		pr_warn("SRAT table not found\n");
>> +		return;
>> +	} else if (ACPI_FAILURE(status)) {
>> +		const char *err = acpi_format_exception(status);
>> +		pr_err("SRAT table error: %s\n", err);
>> +		return;
>> +	}
>> +
>> +	table_end = (unsigned long)table_header + table_header->length;
>> +
>> +	/* Parse all entries looking for a match. */
>> +	sub_header = (struct acpi_subtable_header *)
>> +			((unsigned long)table_header +
>> +			sizeof(struct acpi_table_srat));
>> +	subtable_len = sub_header->length;
>> +
>> +	while (((unsigned long)sub_header) + subtable_len  < table_end) {
>> +		/*
>> +		 * If length is 0, break from this loop to avoid
>> +		 * infinite loop.
>> +		 */
>> +		if (subtable_len == 0) {
>> +			pr_err("SRAT invalid zero length\n");
>> +			break;
>> +		}
>> +
>> +		switch (sub_header->type) {
>> +		case ACPI_SRAT_TYPE_CPU_AFFINITY:
>> +			cpu = (struct acpi_srat_cpu_affinity *)sub_header;
>> +			pxm = *((u32 *)cpu->proximity_domain_hi) << 8 |
>> +					cpu->proximity_domain_lo;
>> +			if (pxm > max_pxm)
>> +				max_pxm = pxm;
>> +			break;
>> +		case ACPI_SRAT_TYPE_GENERIC_AFFINITY:
>> +			gpu = (struct acpi_srat_generic_affinity *)sub_header;
>> +			bdf = *((u16 *)(&gpu->device_handle[0])) << 16 |
>> +					*((u16 *)(&gpu->device_handle[2]));
>> +			if (bdf == pci_id) {
>> +				found = true;
>> +				numa_node = pxm_to_node(gpu->proximity_domain);
>> +			}
>> +			break;
>> +		default:
>> +			break;
>> +		}
>> +
>> +		if (found)
>> +			break;
>> +
>> +		sub_header = (struct acpi_subtable_header *)
>> +				((unsigned long)sub_header + subtable_len);
>> +		subtable_len = sub_header->length;
>> +	}
>> +
>> +	acpi_put_table(table_header);
>> +
>> +	/* Workaround bad cpu-gpu binding case */
>> +	if (found && (numa_node < 0 || numa_node > max_pxm))
>> +		numa_node = 0;
>> +
>> +	if (numa_node != NUMA_NO_NODE)
>> +		set_dev_node(&kdev->pdev->dev, numa_node);
>> +}
>> +#endif
>> +
>>   /* kfd_fill_gpu_direct_io_link - Fill in direct io link from GPU
>>    * to its NUMA node
>>    *	@avail_size: Available size in the memory
>> @@ -1804,10 +1889,16 @@ static int kfd_fill_gpu_direct_io_link_to_cpu(int *avail_size,
>>   	}
>>   
>>   	sub_type_hdr->proximity_domain_from = proximity_domain;
>> -#ifdef CONFIG_NUMA
>> +
>> +#ifdef CONFIG_ACPI_NUMA
>>   	if (kdev->pdev->dev.numa_node == NUMA_NO_NODE)
>> +		kfd_find_numa_node_in_srat(kdev);
>> +#endif
>> +#ifdef CONFIG_NUMA
>> +	if (kdev->pdev->dev.numa_node == NUMA_NO_NODE) {
>>   		sub_type_hdr->proximity_domain_to = 0;
>> -	else
>> +		set_dev_node(&kdev->pdev->dev, 0);
> This should not be here. If you really want to lie about the NUMA node
> and pretend that it's 0 and not NO_NODE, then that should be done in
> kfd_find_numa_node_in_srat. That should be the only function that
> changes the dev->numa_node. Like Oak pointed out, eventually that should
> maybe not even be part of the driver. But I'm OK with keeping it as a
> fallback in the driver for the case that one a GPU doesn't have a NUMA
> node assigned by the kernel.
>
> But is setting the dev->numa_node to 0 really necessary? Does anything
> else in KFD depend on the dev->numa_node being 0? This function is only
> supposed to fill the proximity_domain in the CRAT table. Setting
> dev->numa_node is a side effect. If we can't figure out the correct NUMA
> node, we shouldn't just guess 0 in a way that potentially affects other
> parts of the kernel.
>
> Regards,
>    Felix
>
The reason I am adding it is for 
http://ontrack-internal.amd.com/browse/SWDEV-281376.

RCCL is using /sys/class/drm/card0/device/numa_node to determine numa 
node which GPU is close to. To keep consistence between KFD topology and 
pci sysfs exposure, I add it as a workaround in NPS1 and ACPI is not 
configured.

Thanks,
Eric

>> +	} else
>>   		sub_type_hdr->proximity_domain_to = kdev->pdev->dev.numa_node;
>>   #else
>>   	sub_type_hdr->proximity_domain_to = 0;

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] drm/amdkfd: add ACPI SRAT parsing for topology
  2021-05-03 14:47 Eric Huang
@ 2021-05-03 19:13 ` Felix Kuehling
  2021-05-03 19:27   ` Eric Huang
  0 siblings, 1 reply; 18+ messages in thread
From: Felix Kuehling @ 2021-05-03 19:13 UTC (permalink / raw)
  To: Eric Huang, amd-gfx

Am 2021-05-03 um 10:47 a.m. schrieb Eric Huang:
> In NPS4 BIOS we need to find the closest numa node when creating
> topology io link between cpu and gpu, if PCI driver doesn't set
> it.
>
> Signed-off-by: Eric Huang <jinhuieric.huang@amd.com>
> ---
>  drivers/gpu/drm/amd/amdkfd/kfd_crat.c | 95 ++++++++++++++++++++++++++-
>  1 file changed, 93 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
> index 38d45711675f..58c6738de774 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
> @@ -1759,6 +1759,91 @@ static int kfd_fill_gpu_memory_affinity(int *avail_size,
>  	return 0;
>  }
>  
> +#ifdef CONFIG_ACPI_NUMA
> +static void kfd_find_numa_node_in_srat(struct kfd_dev *kdev)
> +{
> +	struct acpi_table_header *table_header = NULL;
> +	struct acpi_subtable_header *sub_header = NULL;
> +	unsigned long table_end, subtable_len;
> +	u32 pci_id = pci_domain_nr(kdev->pdev->bus) << 16 |
> +			pci_dev_id(kdev->pdev);
> +	u32 bdf;
> +	acpi_status status;
> +	struct acpi_srat_cpu_affinity *cpu;
> +	struct acpi_srat_generic_affinity *gpu;
> +	int pxm = 0, max_pxm = 0;
> +	int numa_node = NUMA_NO_NODE;
> +	bool found = false;
> +
> +	/* Fetch the SRAT table from ACPI */
> +	status = acpi_get_table(ACPI_SIG_SRAT, 0, &table_header);
> +	if (status == AE_NOT_FOUND) {
> +		pr_warn("SRAT table not found\n");
> +		return;
> +	} else if (ACPI_FAILURE(status)) {
> +		const char *err = acpi_format_exception(status);
> +		pr_err("SRAT table error: %s\n", err);
> +		return;
> +	}
> +
> +	table_end = (unsigned long)table_header + table_header->length;
> +
> +	/* Parse all entries looking for a match. */
> +	sub_header = (struct acpi_subtable_header *)
> +			((unsigned long)table_header +
> +			sizeof(struct acpi_table_srat));
> +	subtable_len = sub_header->length;
> +
> +	while (((unsigned long)sub_header) + subtable_len  < table_end) {
> +		/*
> +		 * If length is 0, break from this loop to avoid
> +		 * infinite loop.
> +		 */
> +		if (subtable_len == 0) {
> +			pr_err("SRAT invalid zero length\n");
> +			break;
> +		}
> +
> +		switch (sub_header->type) {
> +		case ACPI_SRAT_TYPE_CPU_AFFINITY:
> +			cpu = (struct acpi_srat_cpu_affinity *)sub_header;
> +			pxm = *((u32 *)cpu->proximity_domain_hi) << 8 |
> +					cpu->proximity_domain_lo;
> +			if (pxm > max_pxm)
> +				max_pxm = pxm;
> +			break;
> +		case ACPI_SRAT_TYPE_GENERIC_AFFINITY:
> +			gpu = (struct acpi_srat_generic_affinity *)sub_header;
> +			bdf = *((u16 *)(&gpu->device_handle[0])) << 16 |
> +					*((u16 *)(&gpu->device_handle[2]));
> +			if (bdf == pci_id) {
> +				found = true;
> +				numa_node = pxm_to_node(gpu->proximity_domain);
> +			}
> +			break;
> +		default:
> +			break;
> +		}
> +
> +		if (found)
> +			break;
> +
> +		sub_header = (struct acpi_subtable_header *)
> +				((unsigned long)sub_header + subtable_len);
> +		subtable_len = sub_header->length;
> +	}
> +
> +	acpi_put_table(table_header);
> +
> +	/* Workaround bad cpu-gpu binding case */
> +	if (found && (numa_node < 0 || numa_node > max_pxm))
> +		numa_node = 0;
> +
> +	if (numa_node != NUMA_NO_NODE)
> +		set_dev_node(&kdev->pdev->dev, numa_node);
> +}
> +#endif
> +
>  /* kfd_fill_gpu_direct_io_link - Fill in direct io link from GPU
>   * to its NUMA node
>   *	@avail_size: Available size in the memory
> @@ -1804,10 +1889,16 @@ static int kfd_fill_gpu_direct_io_link_to_cpu(int *avail_size,
>  	}
>  
>  	sub_type_hdr->proximity_domain_from = proximity_domain;
> -#ifdef CONFIG_NUMA
> +
> +#ifdef CONFIG_ACPI_NUMA
>  	if (kdev->pdev->dev.numa_node == NUMA_NO_NODE)
> +		kfd_find_numa_node_in_srat(kdev);
> +#endif
> +#ifdef CONFIG_NUMA
> +	if (kdev->pdev->dev.numa_node == NUMA_NO_NODE) {
>  		sub_type_hdr->proximity_domain_to = 0;
> -	else
> +		set_dev_node(&kdev->pdev->dev, 0);

This should not be here. If you really want to lie about the NUMA node
and pretend that it's 0 and not NO_NODE, then that should be done in
kfd_find_numa_node_in_srat. That should be the only function that
changes the dev->numa_node. Like Oak pointed out, eventually that should
maybe not even be part of the driver. But I'm OK with keeping it as a
fallback in the driver for the case that one a GPU doesn't have a NUMA
node assigned by the kernel.

But is setting the dev->numa_node to 0 really necessary? Does anything
else in KFD depend on the dev->numa_node being 0? This function is only
supposed to fill the proximity_domain in the CRAT table. Setting
dev->numa_node is a side effect. If we can't figure out the correct NUMA
node, we shouldn't just guess 0 in a way that potentially affects other
parts of the kernel.

Regards,
  Felix


> +	} else
>  		sub_type_hdr->proximity_domain_to = kdev->pdev->dev.numa_node;
>  #else
>  	sub_type_hdr->proximity_domain_to = 0;
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 18+ messages in thread

* [PATCH] drm/amdkfd: add ACPI SRAT parsing for topology
@ 2021-05-03 14:47 Eric Huang
  2021-05-03 19:13 ` Felix Kuehling
  0 siblings, 1 reply; 18+ messages in thread
From: Eric Huang @ 2021-05-03 14:47 UTC (permalink / raw)
  To: amd-gfx; +Cc: Eric Huang, felix.kuehling

In NPS4 BIOS we need to find the closest numa node when creating
topology io link between cpu and gpu, if PCI driver doesn't set
it.

Signed-off-by: Eric Huang <jinhuieric.huang@amd.com>
---
 drivers/gpu/drm/amd/amdkfd/kfd_crat.c | 95 ++++++++++++++++++++++++++-
 1 file changed, 93 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
index 38d45711675f..58c6738de774 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
@@ -1759,6 +1759,91 @@ static int kfd_fill_gpu_memory_affinity(int *avail_size,
 	return 0;
 }
 
+#ifdef CONFIG_ACPI_NUMA
+static void kfd_find_numa_node_in_srat(struct kfd_dev *kdev)
+{
+	struct acpi_table_header *table_header = NULL;
+	struct acpi_subtable_header *sub_header = NULL;
+	unsigned long table_end, subtable_len;
+	u32 pci_id = pci_domain_nr(kdev->pdev->bus) << 16 |
+			pci_dev_id(kdev->pdev);
+	u32 bdf;
+	acpi_status status;
+	struct acpi_srat_cpu_affinity *cpu;
+	struct acpi_srat_generic_affinity *gpu;
+	int pxm = 0, max_pxm = 0;
+	int numa_node = NUMA_NO_NODE;
+	bool found = false;
+
+	/* Fetch the SRAT table from ACPI */
+	status = acpi_get_table(ACPI_SIG_SRAT, 0, &table_header);
+	if (status == AE_NOT_FOUND) {
+		pr_warn("SRAT table not found\n");
+		return;
+	} else if (ACPI_FAILURE(status)) {
+		const char *err = acpi_format_exception(status);
+		pr_err("SRAT table error: %s\n", err);
+		return;
+	}
+
+	table_end = (unsigned long)table_header + table_header->length;
+
+	/* Parse all entries looking for a match. */
+	sub_header = (struct acpi_subtable_header *)
+			((unsigned long)table_header +
+			sizeof(struct acpi_table_srat));
+	subtable_len = sub_header->length;
+
+	while (((unsigned long)sub_header) + subtable_len  < table_end) {
+		/*
+		 * If length is 0, break from this loop to avoid
+		 * infinite loop.
+		 */
+		if (subtable_len == 0) {
+			pr_err("SRAT invalid zero length\n");
+			break;
+		}
+
+		switch (sub_header->type) {
+		case ACPI_SRAT_TYPE_CPU_AFFINITY:
+			cpu = (struct acpi_srat_cpu_affinity *)sub_header;
+			pxm = *((u32 *)cpu->proximity_domain_hi) << 8 |
+					cpu->proximity_domain_lo;
+			if (pxm > max_pxm)
+				max_pxm = pxm;
+			break;
+		case ACPI_SRAT_TYPE_GENERIC_AFFINITY:
+			gpu = (struct acpi_srat_generic_affinity *)sub_header;
+			bdf = *((u16 *)(&gpu->device_handle[0])) << 16 |
+					*((u16 *)(&gpu->device_handle[2]));
+			if (bdf == pci_id) {
+				found = true;
+				numa_node = pxm_to_node(gpu->proximity_domain);
+			}
+			break;
+		default:
+			break;
+		}
+
+		if (found)
+			break;
+
+		sub_header = (struct acpi_subtable_header *)
+				((unsigned long)sub_header + subtable_len);
+		subtable_len = sub_header->length;
+	}
+
+	acpi_put_table(table_header);
+
+	/* Workaround bad cpu-gpu binding case */
+	if (found && (numa_node < 0 || numa_node > max_pxm))
+		numa_node = 0;
+
+	if (numa_node != NUMA_NO_NODE)
+		set_dev_node(&kdev->pdev->dev, numa_node);
+}
+#endif
+
 /* kfd_fill_gpu_direct_io_link - Fill in direct io link from GPU
  * to its NUMA node
  *	@avail_size: Available size in the memory
@@ -1804,10 +1889,16 @@ static int kfd_fill_gpu_direct_io_link_to_cpu(int *avail_size,
 	}
 
 	sub_type_hdr->proximity_domain_from = proximity_domain;
-#ifdef CONFIG_NUMA
+
+#ifdef CONFIG_ACPI_NUMA
 	if (kdev->pdev->dev.numa_node == NUMA_NO_NODE)
+		kfd_find_numa_node_in_srat(kdev);
+#endif
+#ifdef CONFIG_NUMA
+	if (kdev->pdev->dev.numa_node == NUMA_NO_NODE) {
 		sub_type_hdr->proximity_domain_to = 0;
-	else
+		set_dev_node(&kdev->pdev->dev, 0);
+	} else
 		sub_type_hdr->proximity_domain_to = kdev->pdev->dev.numa_node;
 #else
 	sub_type_hdr->proximity_domain_to = 0;
-- 
2.17.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 18+ messages in thread

end of thread, other threads:[~2021-05-05 20:27 UTC | newest]

Thread overview: 18+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-04-28 15:11 [PATCH] drm/amdkfd: add ACPI SRAT parsing for topology Eric Huang
2021-04-30 14:06 ` Eric Huang
2021-04-30 23:42 ` Felix Kuehling
2021-05-03 13:52   ` Eric Huang
2021-05-03 18:43 ` Zeng, Oak
2021-05-03 19:13   ` Eric Huang
2021-05-04  2:17     ` Zeng, Oak
2021-05-04  7:46 ` Lazar, Lijo
2021-05-04 14:00   ` Eric Huang
2021-05-04 14:30     ` Lazar, Lijo
2021-05-04 14:35       ` Eric Huang
2021-05-03 14:47 Eric Huang
2021-05-03 19:13 ` Felix Kuehling
2021-05-03 19:27   ` Eric Huang
2021-05-03 19:34     ` Felix Kuehling
2021-05-05 13:36       ` Eric Huang
2021-05-05 13:51 Eric Huang
2021-05-05 20:27 ` Felix Kuehling

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.