Re: [PATCH] drm/amdkfd: add ACPI SRAT parsing for topology

From: Eric Huang <jinhuieric.huang@amd.com>
To: "Zeng, Oak" <Oak.Zeng@amd.com>,
	"amd-gfx@lists.freedesktop.org" <amd-gfx@lists.freedesktop.org>
Cc: "Kuehling, Felix" <Felix.Kuehling@amd.com>
Subject: Re: [PATCH] drm/amdkfd: add ACPI SRAT parsing for topology
Date: Mon, 3 May 2021 15:13:35 -0400	[thread overview]
Message-ID: <18989d26-b02d-0935-f976-563ca40e5784@amd.com> (raw)
In-Reply-To: <8E4089F8-92B0-4C10-86C1-B0F350E9CEE8@amd.com>

In drivers/acpi/numa/srat.c, the generic CCD parsing is for the mapping 
of numa node and pxm domain that creates arrays of pxm_to_node_map and 
node_to_pxm_map. We are currently using API pxm_to_node() to get the 
corresponding information.

For GCD parsing, the relation of GCD to CCD is defined by AMD, generic 
parsing in srat.c is considering a GCD as a new numa node which is not 
suitable for our need.

Regards,
Eric

On 2021-05-03 2:43 p.m., Zeng, Oak wrote:
> I feel such parsing work should be part of the ACPI generic work so should be done in drivers/acpi/num/srat.c (see acpi_table_parse_srat) and the acpi subsystem should expose APIs for rest drivers to query such numa information.
>
> Regards,
> Oak
>
>   
>
> On 2021-04-28, 11:12 AM, "amd-gfx on behalf of Eric Huang" <amd-gfx-bounces@lists.freedesktop.org on behalf of JinHuiEric.Huang@amd.com> wrote:
>
>      In NPS4 BIOS we need to find the closest numa node when creating
>      topology io link between cpu and gpu, if PCI driver doesn't set
>      it.
>
>      Signed-off-by: Eric Huang <jinhuieric.huang@amd.com>
>      ---
>       drivers/gpu/drm/amd/amdkfd/kfd_crat.c | 94 ++++++++++++++++++++++++++-
>       1 file changed, 91 insertions(+), 3 deletions(-)
>
>      diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
>      index 38d45711675f..57518136c7d7 100644
>      --- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
>      +++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
>      @@ -1759,6 +1759,87 @@ static int kfd_fill_gpu_memory_affinity(int *avail_size,
>       	return 0;
>       }
>
>      +#ifdef CONFIG_ACPI
>      +static void kfd_find_numa_node_in_srat(struct kfd_dev *kdev,
>      +		int *numa_node)
>      +{
>      +	struct acpi_table_header *table_header = NULL;
>      +	struct acpi_subtable_header *sub_header = NULL;
>      +	unsigned long table_end, subtable_len;
>      +	u32 pci_id = pci_domain_nr(kdev->pdev->bus) << 16 |
>      +			pci_dev_id(kdev->pdev);
>      +	u32 bdf;
>      +	acpi_status status;
>      +	struct acpi_srat_cpu_affinity *cpu;
>      +	struct acpi_srat_generic_affinity *gpu;
>      +	int pxm = 0, max_pxm = 0;
>      +	bool found = false;
>      +
>      +	/* Fetch the SRAT table from ACPI */
>      +	status = acpi_get_table(ACPI_SIG_SRAT, 0, &table_header);
>      +	if (status == AE_NOT_FOUND) {
>      +		pr_warn("SRAT table not found\n");
>      +		return;
>      +	} else if (ACPI_FAILURE(status)) {
>      +		const char *err = acpi_format_exception(status);
>      +		pr_err("SRAT table error: %s\n", err);
>      +		return;
>      +	}
>      +
>      +	table_end = (unsigned long)table_header + table_header->length;
>      +
>      +	/* Parse all entries looking for a match. */
>      +
>      +	sub_header = (struct acpi_subtable_header *)
>      +			((unsigned long)table_header +
>      +			sizeof(struct acpi_table_srat));
>      +	subtable_len = sub_header->length;
>      +
>      +	while (((unsigned long)sub_header) + subtable_len  < table_end) {
>      +		/*
>      +		 * If length is 0, break from this loop to avoid
>      +		 * infinite loop.
>      +		 */
>      +		if (subtable_len == 0) {
>      +			pr_err("SRAT invalid zero length\n");
>      +			break;
>      +		}
>      +
>      +		switch (sub_header->type) {
>      +		case ACPI_SRAT_TYPE_CPU_AFFINITY:
>      +			cpu = (struct acpi_srat_cpu_affinity *)sub_header;
>      +			pxm = *((u32 *)cpu->proximity_domain_hi) << 8 |
>      +					cpu->proximity_domain_lo;
>      +			if (pxm > max_pxm)
>      +				max_pxm = pxm;
>      +			break;
>      +		case ACPI_SRAT_TYPE_GENERIC_AFFINITY:
>      +			gpu = (struct acpi_srat_generic_affinity *)sub_header;
>      +			bdf = *((u16 *)(&gpu->device_handle[0])) << 16 |
>      +					*((u16 *)(&gpu->device_handle[2]));
>      +			if (bdf == pci_id) {
>      +				found = true;
>      +				*numa_node = pxm_to_node(gpu->proximity_domain);
>      +			}
>      +			break;
>      +		default:
>      +			break;
>      +		}
>      +
>      +		if (found)
>      +			break;
>      +
>      +		sub_header = (struct acpi_subtable_header *)
>      +				((unsigned long)sub_header + subtable_len);
>      +		subtable_len = sub_header->length;
>      +	}
>      +
>      +	/* workaround bad cpu-gpu binding case */
>      +	if (found && (*numa_node < 0 || *numa_node > max_pxm))
>      +		*numa_node = 0;
>      +}
>      +#endif
>      +
>       /* kfd_fill_gpu_direct_io_link - Fill in direct io link from GPU
>        * to its NUMA node
>        *	@avail_size: Available size in the memory
>      @@ -1774,6 +1855,9 @@ static int kfd_fill_gpu_direct_io_link_to_cpu(int *avail_size,
>       			uint32_t proximity_domain)
>       {
>       	struct amdgpu_device *adev = (struct amdgpu_device *)kdev->kgd;
>      +#ifdef CONFIG_NUMA
>      +	int numa_node = 0;
>      +#endif
>
>       	*avail_size -= sizeof(struct crat_subtype_iolink);
>       	if (*avail_size < 0)
>      @@ -1805,9 +1889,13 @@ static int kfd_fill_gpu_direct_io_link_to_cpu(int *avail_size,
>
>       	sub_type_hdr->proximity_domain_from = proximity_domain;
>       #ifdef CONFIG_NUMA
>      -	if (kdev->pdev->dev.numa_node == NUMA_NO_NODE)
>      -		sub_type_hdr->proximity_domain_to = 0;
>      -	else
>      +	if (kdev->pdev->dev.numa_node == NUMA_NO_NODE) {
>      +#ifdef CONFIG_ACPI
>      +		kfd_find_numa_node_in_srat(kdev, &numa_node);
>      +#endif
>      +		sub_type_hdr->proximity_domain_to = numa_node;
>      +		set_dev_node(&kdev->pdev->dev, numa_node);
>      +	} else
>       		sub_type_hdr->proximity_domain_to = kdev->pdev->dev.numa_node;
>       #else
>       	sub_type_hdr->proximity_domain_to = 0;
>      --
>      2.17.1
>
>      _______________________________________________
>      amd-gfx mailing list
>      amd-gfx@lists.freedesktop.org
>      https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Famd-gfx&amp;data=04%7C01%7Coak.zeng%40amd.com%7C96808a6aab7b40861eeb08d90a580524%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637552195437139248%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&amp;sdata=vx9wqAehK7zUJy2mVAp9KOV3u4ZNvE%2BmDGfGGK%2F8chU%3D&amp;reserved=0
>

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx