* [PATCH v3] drm/amdkfd: Add available memory ioctl
@ 2022-06-09 23:32 David Yat Sin
2022-06-10 22:31 ` Felix Kuehling
0 siblings, 1 reply; 2+ messages in thread
From: David Yat Sin @ 2022-06-09 23:32 UTC (permalink / raw)
To: amd-gfx; +Cc: Daniel.Phillips, Felix.Kuehling
From: Daniel Phillips <Daniel.Phillips@amd.com>
Add a new KFD ioctl to return the largest possible memory size that
can be allocated as a buffer object using
kfd_ioctl_alloc_memory_of_gpu. It attempts to use exactly the same
accept/reject criteria as that function so that allocating a new
buffer object of the size returned by this new ioctl is guaranteed to
succeed, barring races with other allocating tasks.
This IOCTL will be used by libhsakmt:
https://www.mail-archive.com/amd-gfx@lists.freedesktop.org/msg75743.html
Signed-off-by: Daniel Phillips <Daniel.Phillips@amd.com>
Signed-off-by: David Yat Sin <David.YatSin@amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 1 +
.../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 38 +++++++++++++++++--
drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 34 +++++++++++++++++
include/uapi/linux/kfd_ioctl.h | 14 ++++++-
4 files changed, 81 insertions(+), 6 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index ffb2b7d9b9a5..648c031942e9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -268,6 +268,7 @@ int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct amdgpu_device *adev,
void amdgpu_amdkfd_gpuvm_release_process_vm(struct amdgpu_device *adev,
void *drm_priv);
uint64_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *drm_priv);
+size_t amdgpu_amdkfd_get_available_memory(struct amdgpu_device *adev);
int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
struct amdgpu_device *adev, uint64_t va, uint64_t size,
void *drm_priv, struct kgd_mem **mem,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index a1de900ba677..afd6e6923189 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -38,6 +38,12 @@
*/
#define AMDGPU_USERPTR_RESTORE_DELAY_MS 1
+/*
+ * Align VRAM allocations to 2MB to avoid fragmentation caused by 4K allocations in the tail 2MB
+ * BO chunk
+ */
+#define VRAM_ALLOCATION_ALIGN (1 << 21)
+
/* Impose limit on how much memory KFD can use */
static struct {
uint64_t max_system_mem_limit;
@@ -108,7 +114,7 @@ void amdgpu_amdkfd_reserve_system_mem(uint64_t size)
* compromise that should work in most cases without reserving too
* much memory for page tables unnecessarily (factor 16K, >> 14).
*/
-#define ESTIMATE_PT_SIZE(mem_size) ((mem_size) >> 14)
+#define ESTIMATE_PT_SIZE(mem_size) max(((mem_size) >> 14), AMDGPU_VM_RESERVED_VRAM)
static size_t amdgpu_amdkfd_acc_size(uint64_t size)
{
@@ -148,7 +154,13 @@ static int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev,
} else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) {
system_mem_needed = acc_size;
ttm_mem_needed = acc_size;
- vram_needed = size;
+
+ /*
+ * Conservatively round up the allocation requirement to 2 MB
+ * to avoid fragmentation caused by 4K allocations in the tail
+ * 2M BO chunk.
+ */
+ vram_needed = ALIGN(size, VRAM_ALLOCATION_ALIGN);
} else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) {
system_mem_needed = acc_size + size;
ttm_mem_needed = acc_size;
@@ -173,7 +185,9 @@ static int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev,
(kfd_mem_limit.ttm_mem_used + ttm_mem_needed >
kfd_mem_limit.max_ttm_mem_limit) ||
(adev->kfd.vram_used + vram_needed >
- adev->gmc.real_vram_size - reserved_for_pt)) {
+ adev->gmc.real_vram_size -
+ atomic64_read(&adev->vram_pin_size) -
+ reserved_for_pt)) {
ret = -ENOMEM;
goto release;
}
@@ -205,7 +219,7 @@ static void unreserve_mem_limit(struct amdgpu_device *adev,
} else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) {
kfd_mem_limit.system_mem_used -= acc_size;
kfd_mem_limit.ttm_mem_used -= acc_size;
- adev->kfd.vram_used -= size;
+ adev->kfd.vram_used -= ALIGN(size, VRAM_ALLOCATION_ALIGN);
} else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) {
kfd_mem_limit.system_mem_used -= (acc_size + size);
kfd_mem_limit.ttm_mem_used -= acc_size;
@@ -1668,6 +1682,22 @@ int amdgpu_amdkfd_criu_resume(void *p)
return ret;
}
+size_t amdgpu_amdkfd_get_available_memory(struct amdgpu_device *adev)
+{
+ uint64_t reserved_for_pt =
+ ESTIMATE_PT_SIZE(amdgpu_amdkfd_total_mem_size);
+ size_t available;
+
+ spin_lock(&kfd_mem_limit.mem_limit_lock);
+ available = adev->gmc.real_vram_size
+ - adev->kfd.vram_used
+ - atomic64_read(&adev->vram_pin_size)
+ - reserved_for_pt;
+ spin_unlock(&kfd_mem_limit.mem_limit_lock);
+
+ return ALIGN_DOWN(available, VRAM_ALLOCATION_ALIGN);
+}
+
int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
struct amdgpu_device *adev, uint64_t va, uint64_t size,
void *drm_priv, struct kgd_mem **mem,
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index 95fa7a9718bb..625e837f0119 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -65,6 +65,25 @@ static int kfd_char_dev_major = -1;
static struct class *kfd_class;
struct device *kfd_device;
+static inline struct kfd_process_device *kfd_lock_pdd_by_id(struct kfd_process *p, __u32 gpu_id)
+{
+ struct kfd_process_device *pdd;
+
+ mutex_lock(&p->mutex);
+ pdd = kfd_process_device_data_by_id(p, gpu_id);
+
+ if (pdd)
+ return pdd;
+
+ mutex_unlock(&p->mutex);
+ return NULL;
+}
+
+static inline void kfd_unlock_pdd(struct kfd_process_device *pdd)
+{
+ mutex_unlock(&pdd->process->mutex);
+}
+
int kfd_chardev_init(void)
{
int err = 0;
@@ -958,6 +977,19 @@ bool kfd_dev_is_large_bar(struct kfd_dev *dev)
return false;
}
+static int kfd_ioctl_get_available_memory(struct file *filep,
+ struct kfd_process *p, void *data)
+{
+ struct kfd_ioctl_get_available_memory_args *args = data;
+ struct kfd_process_device *pdd = kfd_lock_pdd_by_id(p, args->gpu_id);
+
+ if (!pdd)
+ return -EINVAL;
+ args->available = amdgpu_amdkfd_get_available_memory(pdd->dev->adev);
+ kfd_unlock_pdd(pdd);
+ return 0;
+}
+
static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep,
struct kfd_process *p, void *data)
{
@@ -2642,6 +2674,8 @@ static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = {
AMDKFD_IOCTL_DEF(AMDKFD_IOC_CRIU_OP,
kfd_ioctl_criu, KFD_IOC_FLAG_CHECKPOINT_RESTORE),
+ AMDKFD_IOCTL_DEF(AMDKFD_IOC_AVAILABLE_MEMORY,
+ kfd_ioctl_get_available_memory, 0),
};
#define AMDKFD_CORE_IOCTL_COUNT ARRAY_SIZE(amdkfd_ioctls)
diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h
index eb9ff85f8556..c648ed7c5ff1 100644
--- a/include/uapi/linux/kfd_ioctl.h
+++ b/include/uapi/linux/kfd_ioctl.h
@@ -34,9 +34,10 @@
* - 1.6 - Query clear flags in SVM get_attr API
* - 1.7 - Checkpoint Restore (CRIU) API
* - 1.8 - CRIU - Support for SDMA transfers with GTT BOs
+ * - 1.9 - Add available memory ioctl
*/
#define KFD_IOCTL_MAJOR_VERSION 1
-#define KFD_IOCTL_MINOR_VERSION 8
+#define KFD_IOCTL_MINOR_VERSION 9
struct kfd_ioctl_get_version_args {
__u32 major_version; /* from KFD */
@@ -100,6 +101,12 @@ struct kfd_ioctl_get_queue_wave_state_args {
__u32 pad;
};
+struct kfd_ioctl_get_available_memory_args {
+ __u64 available; /* from KFD */
+ __u32 gpu_id; /* to KFD */
+ __u32 pad;
+};
+
/* For kfd_ioctl_set_memory_policy_args.default_policy and alternate_policy */
#define KFD_IOC_CACHE_POLICY_COHERENT 0
#define KFD_IOC_CACHE_POLICY_NONCOHERENT 1
@@ -824,7 +831,10 @@ struct kfd_ioctl_set_xnack_mode_args {
#define AMDKFD_IOC_CRIU_OP \
AMDKFD_IOWR(0x22, struct kfd_ioctl_criu_args)
+#define AMDKFD_IOC_AVAILABLE_MEMORY \
+ AMDKFD_IOWR(0x23, struct kfd_ioctl_get_available_memory_args)
+
#define AMDKFD_COMMAND_START 0x01
-#define AMDKFD_COMMAND_END 0x23
+#define AMDKFD_COMMAND_END 0x24
#endif
--
2.30.2
^ permalink raw reply related [flat|nested] 2+ messages in thread
* Re: [PATCH v3] drm/amdkfd: Add available memory ioctl
2022-06-09 23:32 [PATCH v3] drm/amdkfd: Add available memory ioctl David Yat Sin
@ 2022-06-10 22:31 ` Felix Kuehling
0 siblings, 0 replies; 2+ messages in thread
From: Felix Kuehling @ 2022-06-10 22:31 UTC (permalink / raw)
To: David Yat Sin, amd-gfx; +Cc: Daniel.Phillips
Am 2022-06-09 um 19:32 schrieb David Yat Sin:
> From: Daniel Phillips <Daniel.Phillips@amd.com>
>
> Add a new KFD ioctl to return the largest possible memory size that
> can be allocated as a buffer object using
> kfd_ioctl_alloc_memory_of_gpu. It attempts to use exactly the same
> accept/reject criteria as that function so that allocating a new
> buffer object of the size returned by this new ioctl is guaranteed to
> succeed, barring races with other allocating tasks.
>
> This IOCTL will be used by libhsakmt:
> https://www.mail-archive.com/amd-gfx@lists.freedesktop.org/msg75743.html
>
> Signed-off-by: Daniel Phillips <Daniel.Phillips@amd.com>
> Signed-off-by: David Yat Sin <David.YatSin@amd.com>
Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 1 +
> .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 38 +++++++++++++++++--
> drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 34 +++++++++++++++++
> include/uapi/linux/kfd_ioctl.h | 14 ++++++-
> 4 files changed, 81 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> index ffb2b7d9b9a5..648c031942e9 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> @@ -268,6 +268,7 @@ int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct amdgpu_device *adev,
> void amdgpu_amdkfd_gpuvm_release_process_vm(struct amdgpu_device *adev,
> void *drm_priv);
> uint64_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *drm_priv);
> +size_t amdgpu_amdkfd_get_available_memory(struct amdgpu_device *adev);
> int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
> struct amdgpu_device *adev, uint64_t va, uint64_t size,
> void *drm_priv, struct kgd_mem **mem,
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> index a1de900ba677..afd6e6923189 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> @@ -38,6 +38,12 @@
> */
> #define AMDGPU_USERPTR_RESTORE_DELAY_MS 1
>
> +/*
> + * Align VRAM allocations to 2MB to avoid fragmentation caused by 4K allocations in the tail 2MB
> + * BO chunk
> + */
> +#define VRAM_ALLOCATION_ALIGN (1 << 21)
> +
> /* Impose limit on how much memory KFD can use */
> static struct {
> uint64_t max_system_mem_limit;
> @@ -108,7 +114,7 @@ void amdgpu_amdkfd_reserve_system_mem(uint64_t size)
> * compromise that should work in most cases without reserving too
> * much memory for page tables unnecessarily (factor 16K, >> 14).
> */
> -#define ESTIMATE_PT_SIZE(mem_size) ((mem_size) >> 14)
> +#define ESTIMATE_PT_SIZE(mem_size) max(((mem_size) >> 14), AMDGPU_VM_RESERVED_VRAM)
>
> static size_t amdgpu_amdkfd_acc_size(uint64_t size)
> {
> @@ -148,7 +154,13 @@ static int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev,
> } else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) {
> system_mem_needed = acc_size;
> ttm_mem_needed = acc_size;
> - vram_needed = size;
> +
> + /*
> + * Conservatively round up the allocation requirement to 2 MB
> + * to avoid fragmentation caused by 4K allocations in the tail
> + * 2M BO chunk.
> + */
> + vram_needed = ALIGN(size, VRAM_ALLOCATION_ALIGN);
> } else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) {
> system_mem_needed = acc_size + size;
> ttm_mem_needed = acc_size;
> @@ -173,7 +185,9 @@ static int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev,
> (kfd_mem_limit.ttm_mem_used + ttm_mem_needed >
> kfd_mem_limit.max_ttm_mem_limit) ||
> (adev->kfd.vram_used + vram_needed >
> - adev->gmc.real_vram_size - reserved_for_pt)) {
> + adev->gmc.real_vram_size -
> + atomic64_read(&adev->vram_pin_size) -
> + reserved_for_pt)) {
> ret = -ENOMEM;
> goto release;
> }
> @@ -205,7 +219,7 @@ static void unreserve_mem_limit(struct amdgpu_device *adev,
> } else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) {
> kfd_mem_limit.system_mem_used -= acc_size;
> kfd_mem_limit.ttm_mem_used -= acc_size;
> - adev->kfd.vram_used -= size;
> + adev->kfd.vram_used -= ALIGN(size, VRAM_ALLOCATION_ALIGN);
> } else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) {
> kfd_mem_limit.system_mem_used -= (acc_size + size);
> kfd_mem_limit.ttm_mem_used -= acc_size;
> @@ -1668,6 +1682,22 @@ int amdgpu_amdkfd_criu_resume(void *p)
> return ret;
> }
>
> +size_t amdgpu_amdkfd_get_available_memory(struct amdgpu_device *adev)
> +{
> + uint64_t reserved_for_pt =
> + ESTIMATE_PT_SIZE(amdgpu_amdkfd_total_mem_size);
> + size_t available;
> +
> + spin_lock(&kfd_mem_limit.mem_limit_lock);
> + available = adev->gmc.real_vram_size
> + - adev->kfd.vram_used
> + - atomic64_read(&adev->vram_pin_size)
> + - reserved_for_pt;
> + spin_unlock(&kfd_mem_limit.mem_limit_lock);
> +
> + return ALIGN_DOWN(available, VRAM_ALLOCATION_ALIGN);
> +}
> +
> int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
> struct amdgpu_device *adev, uint64_t va, uint64_t size,
> void *drm_priv, struct kgd_mem **mem,
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> index 95fa7a9718bb..625e837f0119 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> @@ -65,6 +65,25 @@ static int kfd_char_dev_major = -1;
> static struct class *kfd_class;
> struct device *kfd_device;
>
> +static inline struct kfd_process_device *kfd_lock_pdd_by_id(struct kfd_process *p, __u32 gpu_id)
> +{
> + struct kfd_process_device *pdd;
> +
> + mutex_lock(&p->mutex);
> + pdd = kfd_process_device_data_by_id(p, gpu_id);
> +
> + if (pdd)
> + return pdd;
> +
> + mutex_unlock(&p->mutex);
> + return NULL;
> +}
> +
> +static inline void kfd_unlock_pdd(struct kfd_process_device *pdd)
> +{
> + mutex_unlock(&pdd->process->mutex);
> +}
> +
> int kfd_chardev_init(void)
> {
> int err = 0;
> @@ -958,6 +977,19 @@ bool kfd_dev_is_large_bar(struct kfd_dev *dev)
> return false;
> }
>
> +static int kfd_ioctl_get_available_memory(struct file *filep,
> + struct kfd_process *p, void *data)
> +{
> + struct kfd_ioctl_get_available_memory_args *args = data;
> + struct kfd_process_device *pdd = kfd_lock_pdd_by_id(p, args->gpu_id);
> +
> + if (!pdd)
> + return -EINVAL;
> + args->available = amdgpu_amdkfd_get_available_memory(pdd->dev->adev);
> + kfd_unlock_pdd(pdd);
> + return 0;
> +}
> +
> static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep,
> struct kfd_process *p, void *data)
> {
> @@ -2642,6 +2674,8 @@ static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = {
> AMDKFD_IOCTL_DEF(AMDKFD_IOC_CRIU_OP,
> kfd_ioctl_criu, KFD_IOC_FLAG_CHECKPOINT_RESTORE),
>
> + AMDKFD_IOCTL_DEF(AMDKFD_IOC_AVAILABLE_MEMORY,
> + kfd_ioctl_get_available_memory, 0),
> };
>
> #define AMDKFD_CORE_IOCTL_COUNT ARRAY_SIZE(amdkfd_ioctls)
> diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h
> index eb9ff85f8556..c648ed7c5ff1 100644
> --- a/include/uapi/linux/kfd_ioctl.h
> +++ b/include/uapi/linux/kfd_ioctl.h
> @@ -34,9 +34,10 @@
> * - 1.6 - Query clear flags in SVM get_attr API
> * - 1.7 - Checkpoint Restore (CRIU) API
> * - 1.8 - CRIU - Support for SDMA transfers with GTT BOs
> + * - 1.9 - Add available memory ioctl
> */
> #define KFD_IOCTL_MAJOR_VERSION 1
> -#define KFD_IOCTL_MINOR_VERSION 8
> +#define KFD_IOCTL_MINOR_VERSION 9
>
> struct kfd_ioctl_get_version_args {
> __u32 major_version; /* from KFD */
> @@ -100,6 +101,12 @@ struct kfd_ioctl_get_queue_wave_state_args {
> __u32 pad;
> };
>
> +struct kfd_ioctl_get_available_memory_args {
> + __u64 available; /* from KFD */
> + __u32 gpu_id; /* to KFD */
> + __u32 pad;
> +};
> +
> /* For kfd_ioctl_set_memory_policy_args.default_policy and alternate_policy */
> #define KFD_IOC_CACHE_POLICY_COHERENT 0
> #define KFD_IOC_CACHE_POLICY_NONCOHERENT 1
> @@ -824,7 +831,10 @@ struct kfd_ioctl_set_xnack_mode_args {
> #define AMDKFD_IOC_CRIU_OP \
> AMDKFD_IOWR(0x22, struct kfd_ioctl_criu_args)
>
> +#define AMDKFD_IOC_AVAILABLE_MEMORY \
> + AMDKFD_IOWR(0x23, struct kfd_ioctl_get_available_memory_args)
> +
> #define AMDKFD_COMMAND_START 0x01
> -#define AMDKFD_COMMAND_END 0x23
> +#define AMDKFD_COMMAND_END 0x24
>
> #endif
^ permalink raw reply [flat|nested] 2+ messages in thread
end of thread, other threads:[~2022-06-10 22:31 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-06-09 23:32 [PATCH v3] drm/amdkfd: Add available memory ioctl David Yat Sin
2022-06-10 22:31 ` Felix Kuehling
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.