All of lore.kernel.org
 help / color / mirror / Atom feed
From: Felix Kuehling <felix.kuehling@amd.com>
To: Jonathan Kim <jonathan.kim@amd.com>, amd-gfx@lists.freedesktop.org
Cc: Hawking.Zhang@amd.com
Subject: Re: [PATCH 2/4] drm/amdkfd: report num xgmi links between direct peers to the kfd
Date: Fri, 9 Jul 2021 20:43:52 -0400	[thread overview]
Message-ID: <9cef7ee0-a7ad-908f-aafd-b9389e98e7a4@amd.com> (raw)
In-Reply-To: <20210621192348.2775943-2-jonathan.kim@amd.com>


On 2021-06-21 3:23 p.m., Jonathan Kim wrote:
> Since Min/Max bandwidth was never used, it will repurposed to report the
> number of xgmi links between direct peers to the KFD topology.
>
> Signed-off-by: Jonathan Kim <jonathan.kim@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 15 +++++++++++++++
>   drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h |  1 +
>   drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c   | 12 ++++++++++++
>   drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h   |  2 ++
>   drivers/gpu/drm/amd/amdkfd/kfd_crat.c      | 11 +++++++++--
>   drivers/gpu/drm/amd/amdkfd/kfd_crat.h      |  4 ++--
>   6 files changed, 41 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
> index bfab2f9fdd17..c84989eda8eb 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
> @@ -553,6 +553,21 @@ uint8_t amdgpu_amdkfd_get_xgmi_hops_count(struct kgd_dev *dst, struct kgd_dev *s
>   	return  (uint8_t)ret;
>   }
>   
> +uint8_t amdgpu_amdkfd_get_xgmi_num_links(struct kgd_dev *dst, struct kgd_dev *src)
> +{
> +	struct amdgpu_device *peer_adev = (struct amdgpu_device *)src;
> +	struct amdgpu_device *adev = (struct amdgpu_device *)dst;
> +	int ret = amdgpu_xgmi_get_num_links(adev, peer_adev);
> +
> +	if (ret < 0) {
> +		DRM_ERROR("amdgpu: failed to get xgmi num links between node %d and %d. ret = %d\n",
> +			adev->gmc.xgmi.physical_node_id,
> +			peer_adev->gmc.xgmi.physical_node_id, ret);
> +		ret = 0;
> +	}
> +	return  (uint8_t)ret;
> +}
> +
>   uint64_t amdgpu_amdkfd_get_mmio_remap_phys_addr(struct kgd_dev *kgd)
>   {
>   	struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> index fabc68eec36a..20e4bfce62be 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> @@ -226,6 +226,7 @@ uint32_t amdgpu_amdkfd_get_num_gws(struct kgd_dev *kgd);
>   uint32_t amdgpu_amdkfd_get_asic_rev_id(struct kgd_dev *kgd);
>   int amdgpu_amdkfd_get_noretry(struct kgd_dev *kgd);
>   uint8_t amdgpu_amdkfd_get_xgmi_hops_count(struct kgd_dev *dst, struct kgd_dev *src);
> +uint8_t amdgpu_amdkfd_get_xgmi_num_links(struct kgd_dev *dst, struct kgd_dev *src);
>   
>   /* Read user wptr from a specified user address space with page fault
>    * disabled. The memory must be pinned and mapped to the hardware when
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
> index 8567d5d77346..258cf86b32f6 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
> @@ -486,6 +486,18 @@ int amdgpu_xgmi_get_hops_count(struct amdgpu_device *adev,
>   	return	-EINVAL;
>   }
>   
> +int amdgpu_xgmi_get_num_links(struct amdgpu_device *adev,
> +		struct amdgpu_device *peer_adev)
> +{
> +	struct psp_xgmi_topology_info *top = &adev->psp.xgmi_context.top_info;
> +	int i;
> +
> +	for (i = 0 ; i < top->num_nodes; ++i)
> +		if (top->nodes[i].node_id == peer_adev->gmc.xgmi.node_id)
> +			return top->nodes[i].num_links;
> +	return	-EINVAL;
> +}
> +
>   int amdgpu_xgmi_add_device(struct amdgpu_device *adev)
>   {
>   	struct psp_xgmi_topology_info *top_info;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h
> index 12969c0830d5..d2189bf7d428 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h
> @@ -59,6 +59,8 @@ int amdgpu_xgmi_remove_device(struct amdgpu_device *adev);
>   int amdgpu_xgmi_set_pstate(struct amdgpu_device *adev, int pstate);
>   int amdgpu_xgmi_get_hops_count(struct amdgpu_device *adev,
>   		struct amdgpu_device *peer_adev);
> +int amdgpu_xgmi_get_num_links(struct amdgpu_device *adev,
> +		struct amdgpu_device *peer_adev);
>   uint64_t amdgpu_xgmi_get_relative_phy_addr(struct amdgpu_device *adev,
>   					   uint64_t addr);
>   static inline bool amdgpu_xgmi_same_hive(struct amdgpu_device *adev,
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
> index c6b02aee4993..75047b77649b 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
> @@ -1034,8 +1034,8 @@ static int kfd_parse_subtype_iolink(struct crat_subtype_iolink *iolink,
>   
>   			props->min_latency = iolink->minimum_latency;
>   			props->max_latency = iolink->maximum_latency;
> -			props->min_bandwidth = iolink->minimum_bandwidth_mbs;
> -			props->max_bandwidth = iolink->maximum_bandwidth_mbs;
> +			props->min_bandwidth = iolink->minimum_bandwidth;
> +			props->max_bandwidth = iolink->maximum_bandwidth;
>   			props->rec_transfer_size =
>   					iolink->recommended_transfer_size;
>   
> @@ -1989,6 +1989,8 @@ static int kfd_fill_gpu_direct_io_link_to_cpu(int *avail_size,
>   		sub_type_hdr->flags |= CRAT_IOLINK_FLAGS_BI_DIRECTIONAL;
>   		sub_type_hdr->io_interface_type = CRAT_IOLINK_TYPE_XGMI;
>   		sub_type_hdr->num_hops_xgmi = 1;
> +		sub_type_hdr->minimum_bandwidth = 1;
> +		sub_type_hdr->maximum_bandwidth = 1;
>   	} else {
>   		sub_type_hdr->io_interface_type = CRAT_IOLINK_TYPE_PCIEXPRESS;
>   	}
> @@ -2033,6 +2035,11 @@ static int kfd_fill_gpu_xgmi_link_to_gpu(int *avail_size,
>   	sub_type_hdr->proximity_domain_to = proximity_domain_to;
>   	sub_type_hdr->num_hops_xgmi =
>   		amdgpu_amdkfd_get_xgmi_hops_count(kdev->kgd, peer_kdev->kgd);
> +	sub_type_hdr->maximum_bandwidth =
> +		amdgpu_amdkfd_get_xgmi_num_links(kdev->kgd, peer_kdev->kgd);
> +	sub_type_hdr->minimum_bandwidth =
> +		sub_type_hdr->maximum_bandwidth ? 1 : 0;

Reporting the number of XGMI links directly was not my intention. We 
should use it to calculate the actual bandwidth. It depends on the 
number of links and the clocks. You can use minimum and maximum clock to 
calculate the min and max bandwidth. I don't think the number of 
parallel links changes dynamically, so minimum bandwidth should use the 
same number of links.

The CRAT definition seems to use MB/s as the unit.


> +
>   	return 0;
>   }
>   
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.h b/drivers/gpu/drm/amd/amdkfd/kfd_crat.h
> index d54ceebd346b..d1f6de5edfb9 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.h
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.h
> @@ -271,8 +271,8 @@ struct crat_subtype_iolink {
>   	uint16_t	version_minor;
>   	uint32_t	minimum_latency;
>   	uint32_t	maximum_latency;
> -	uint32_t	minimum_bandwidth_mbs;
> -	uint32_t	maximum_bandwidth_mbs;
> +	uint32_t	minimum_bandwidth;
> +	uint32_t	maximum_bandwidth;

I don't think we should change the CRAT definition.

Regards,
   Felix


>   	uint32_t	recommended_transfer_size;
>   	uint8_t		reserved2[CRAT_IOLINK_RESERVED_LENGTH - 1];
>   	uint8_t		num_hops_xgmi;
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

  reply	other threads:[~2021-07-10  0:43 UTC|newest]

Thread overview: 8+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-06-21 19:23 [PATCH 1/4] drm/amdgpu: add psp command to get num xgmi links between direct peers Jonathan Kim
2021-06-21 19:23 ` [PATCH 2/4] drm/amdkfd: report num xgmi links between direct peers to the kfd Jonathan Kim
2021-07-10  0:43   ` Felix Kuehling [this message]
2021-06-21 19:23 ` [PATCH 3/4] drm/amdkfd: report pcie bandwidth as number of lanes Jonathan Kim
2021-06-28 20:44   ` Kim, Jonathan
2021-07-10  0:45   ` Felix Kuehling
2021-06-21 19:23 ` [PATCH 4/4] drm/amdkfd: add direct link flag to link properties Jonathan Kim
2021-07-09 20:28   ` Kasiviswanathan, Harish

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=9cef7ee0-a7ad-908f-aafd-b9389e98e7a4@amd.com \
    --to=felix.kuehling@amd.com \
    --cc=Hawking.Zhang@amd.com \
    --cc=amd-gfx@lists.freedesktop.org \
    --cc=jonathan.kim@amd.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.