All of lore.kernel.org
 help / color / mirror / Atom feed
From: Rajneesh Bhardwaj <rajneesh.bhardwaj@amd.com>
To: <amd-gfx@lists.freedesktop.org>, <dri-devel@lists.freedesktop.org>
Cc: daniel.vetter@ffwll.ch, felix.kuehling@amd.com,
	Rajneesh Bhardwaj <rajneesh.bhardwaj@amd.com>,
	David Yat Sin <david.yatsin@amd.com>,
	alexander.deucher@amd.com, airlied@redhat.com,
	christian.koenig@amd.com
Subject: [Patch v4 17/24] drm/amdkfd: CRIU export BOs as prime dmabuf objects
Date: Wed, 22 Dec 2021 19:37:04 -0500	[thread overview]
Message-ID: <20211223003711.13064-18-rajneesh.bhardwaj@amd.com> (raw)
In-Reply-To: <20211223003711.13064-1-rajneesh.bhardwaj@amd.com>

KFD buffer objects do not associate a GEM handle with them so cannot
directly be used with libdrm to initiate a system dma (sDMA) operation
to speedup the checkpoint and restore operation so export them as dmabuf
objects and use with libdrm helper (amdgpu_bo_import) to further process
the sdma command submissions.

With sDMA, we see huge improvement in checkpoint and restore operations
compared to the generic pci based access via host data path.

Suggested-by: Felix Kuehling <felix.kuehling@amd.com>
Signed-off-by: Rajneesh Bhardwaj <rajneesh.bhardwaj@amd.com>
Signed-off-by: David Yat Sin <david.yatsin@amd.com>
---
 drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 71 +++++++++++++++++++++++-
 1 file changed, 69 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index 20652d488cde..178b0ccfb286 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -35,6 +35,7 @@
 #include <linux/mman.h>
 #include <linux/ptrace.h>
 #include <linux/dma-buf.h>
+#include <linux/fdtable.h>
 #include <asm/processor.h>
 #include "kfd_priv.h"
 #include "kfd_device_queue_manager.h"
@@ -43,6 +44,7 @@
 #include "amdgpu_amdkfd.h"
 #include "kfd_smi_events.h"
 #include "amdgpu_object.h"
+#include "amdgpu_dma_buf.h"
 
 static long kfd_ioctl(struct file *, unsigned int, unsigned long);
 static int kfd_open(struct inode *, struct file *);
@@ -1932,6 +1934,33 @@ uint64_t get_process_num_bos(struct kfd_process *p)
 	return num_of_bos;
 }
 
+static int criu_get_prime_handle(struct drm_gem_object *gobj, int flags,
+				      u32 *shared_fd)
+{
+	struct dma_buf *dmabuf;
+	int ret;
+
+	dmabuf = amdgpu_gem_prime_export(gobj, flags);
+	if (IS_ERR(dmabuf)) {
+		ret = PTR_ERR(dmabuf);
+		pr_err("dmabuf export failed for the BO\n");
+		return ret;
+	}
+
+	ret = dma_buf_fd(dmabuf, flags);
+	if (ret < 0) {
+		pr_err("dmabuf create fd failed, ret:%d\n", ret);
+		goto out_free_dmabuf;
+	}
+
+	*shared_fd = ret;
+	return 0;
+
+out_free_dmabuf:
+	dma_buf_put(dmabuf);
+	return ret;
+}
+
 static int criu_checkpoint_bos(struct kfd_process *p,
 			       uint32_t num_bos,
 			       uint8_t __user *user_bos,
@@ -1992,6 +2021,14 @@ static int criu_checkpoint_bos(struct kfd_process *p,
 					goto exit;
 				}
 			}
+			if (bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) {
+				ret = criu_get_prime_handle(&dumper_bo->tbo.base,
+						bo_bucket->alloc_flags &
+						KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE ? DRM_RDWR : 0,
+						&bo_bucket->dmabuf_fd);
+				if (ret)
+					goto exit;
+			}
 			if (bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL)
 				bo_bucket->offset = KFD_MMAP_TYPE_DOORBELL |
 					KFD_MMAP_GPU_ID(pdd->dev->id);
@@ -2031,6 +2068,10 @@ static int criu_checkpoint_bos(struct kfd_process *p,
 	*priv_offset += num_bos * sizeof(*bo_privs);
 
 exit:
+	while (ret && bo_index--) {
+		if (bo_buckets[bo_index].alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM)
+			close_fd(bo_buckets[bo_index].dmabuf_fd);
+	}
 
 	kvfree(bo_buckets);
 	kvfree(bo_privs);
@@ -2131,16 +2172,28 @@ static int criu_checkpoint(struct file *filep,
 		ret = kfd_criu_checkpoint_queues(p, (uint8_t __user *)args->priv_data,
 						 &priv_offset);
 		if (ret)
-			goto exit_unlock;
+			goto close_bo_fds;
 
 		ret = kfd_criu_checkpoint_events(p, (uint8_t __user *)args->priv_data,
 						 &priv_offset);
 		if (ret)
-			goto exit_unlock;
+			goto close_bo_fds;
 
 		/* TODO: Dump SVM-Ranges */
 	}
 
+close_bo_fds:
+	if (ret) {
+		/* If IOCTL returns err, user assumes all FDs opened in criu_dump_bos are closed */
+		uint32_t i;
+		struct kfd_criu_bo_bucket *bo_buckets = (struct kfd_criu_bo_bucket *) args->bos;
+
+		for (i = 0; i < num_bos; i++) {
+			if (bo_buckets[i].alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM)
+				close_fd(bo_buckets[i].dmabuf_fd);
+		}
+	}
+
 exit_unlock:
 	mutex_unlock(&p->mutex);
 	if (ret)
@@ -2335,6 +2388,7 @@ static int criu_restore_bos(struct kfd_process *p,
 		struct kfd_criu_bo_priv_data *bo_priv;
 		struct kfd_dev *dev;
 		struct kfd_process_device *pdd;
+		struct kgd_mem *kgd_mem;
 		void *mem;
 		u64 offset;
 		int idr_handle;
@@ -2479,6 +2533,15 @@ static int criu_restore_bos(struct kfd_process *p,
 		}
 
 		pr_debug("map memory was successful for the BO\n");
+		/* create the dmabuf object and export the bo */
+		kgd_mem = (struct kgd_mem *)mem;
+		if (bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) {
+			ret = criu_get_prime_handle(&kgd_mem->bo->tbo.base,
+						    DRM_RDWR,
+						    &bo_bucket->dmabuf_fd);
+			if (ret)
+				goto exit;
+		}
 	} /* done */
 
 	if (flush_tlbs) {
@@ -2506,6 +2569,10 @@ static int criu_restore_bos(struct kfd_process *p,
 		ret = -EFAULT;
 
 exit:
+	while (ret && i--) {
+		if (bo_buckets[i].alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM)
+			close_fd(bo_buckets[i].dmabuf_fd);
+	}
 	kvfree(bo_buckets);
 	kvfree(bo_privs);
 	return ret;
-- 
2.17.1


  parent reply	other threads:[~2021-12-23  0:38 UTC|newest]

Thread overview: 39+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-12-23  0:36 [Patch v4 00/24] CHECKPOINT RESTORE WITH ROCm Rajneesh Bhardwaj
2021-12-23  0:36 ` [Patch v4 01/24] x86/configs: CRIU update debug rock defconfig Rajneesh Bhardwaj
2021-12-23  0:36 ` [Patch v4 02/24] x86/configs: Add rock-rel_defconfig for amd-feature-criu branch Rajneesh Bhardwaj
2021-12-23  0:36 ` [Patch v4 03/24] drm/amdkfd: CRIU Introduce Checkpoint-Restore APIs Rajneesh Bhardwaj
2022-01-10 22:08   ` Felix Kuehling
2021-12-23  0:36 ` [Patch v4 04/24] drm/amdkfd: CRIU Implement KFD process_info ioctl Rajneesh Bhardwaj
2022-01-10 22:47   ` Felix Kuehling
2021-12-23  0:36 ` [Patch v4 05/24] drm/amdkfd: CRIU Implement KFD checkpoint ioctl Rajneesh Bhardwaj
2021-12-23  0:36 ` [Patch v4 06/24] drm/amdkfd: CRIU Implement KFD restore ioctl Rajneesh Bhardwaj
2022-01-10 23:01   ` Felix Kuehling
2021-12-23  0:36 ` [Patch v4 07/24] drm/amdkfd: CRIU Implement KFD resume ioctl Rajneesh Bhardwaj
2022-01-10 23:16   ` Felix Kuehling
2021-12-23  0:36 ` [Patch v4 08/24] drm/amdkfd: CRIU Implement KFD unpause operation Rajneesh Bhardwaj
2021-12-23  0:36 ` [Patch v4 09/24] drm/amdkfd: CRIU add queues support Rajneesh Bhardwaj
2021-12-23  0:36 ` [Patch v4 10/24] drm/amdkfd: CRIU restore queue ids Rajneesh Bhardwaj
2021-12-23  0:36 ` [Patch v4 11/24] drm/amdkfd: CRIU restore sdma id for queues Rajneesh Bhardwaj
2021-12-23  0:36 ` [Patch v4 12/24] drm/amdkfd: CRIU restore queue doorbell id Rajneesh Bhardwaj
2021-12-23  0:37 ` [Patch v4 13/24] drm/amdkfd: CRIU checkpoint and restore queue mqds Rajneesh Bhardwaj
2022-01-10 23:32   ` Felix Kuehling
2021-12-23  0:37 ` [Patch v4 14/24] drm/amdkfd: CRIU checkpoint and restore queue control stack Rajneesh Bhardwaj
2021-12-23  0:37 ` [Patch v4 15/24] drm/amdkfd: CRIU checkpoint and restore events Rajneesh Bhardwaj
2021-12-23  0:37 ` [Patch v4 16/24] drm/amdkfd: CRIU implement gpu_id remapping Rajneesh Bhardwaj
2021-12-23  0:37 ` Rajneesh Bhardwaj [this message]
2021-12-23  0:37 ` [Patch v4 18/24] drm/amdkfd: CRIU checkpoint and restore xnack mode Rajneesh Bhardwaj
2022-01-05 15:22   ` philip yang
2022-01-11  0:10     ` Felix Kuehling
2022-01-11 15:49       ` philip yang
2021-12-23  0:37 ` [Patch v4 19/24] drm/amdkfd: CRIU allow external mm for svm ranges Rajneesh Bhardwaj
2021-12-23  0:37 ` [Patch v4 20/24] drm/amdkfd: use user_gpu_id " Rajneesh Bhardwaj
2021-12-23  0:37 ` [Patch v4 21/24] drm/amdkfd: CRIU Discover " Rajneesh Bhardwaj
2022-01-05 14:48   ` philip yang
2022-01-10 23:11   ` philip yang
2021-12-23  0:37 ` [Patch v4 22/24] drm/amdkfd: CRIU Save Shared Virtual Memory ranges Rajneesh Bhardwaj
2021-12-23  0:37 ` [Patch v4 23/24] drm/amdkfd: CRIU prepare for svm resume Rajneesh Bhardwaj
2022-01-05 14:43   ` philip yang
2022-01-10 23:58     ` Felix Kuehling
2022-01-11 15:58       ` philip yang
2021-12-23  0:37 ` [Patch v4 24/24] drm/amdkfd: CRIU resume shared virtual memory ranges Rajneesh Bhardwaj
2022-01-11  0:03   ` Felix Kuehling

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20211223003711.13064-18-rajneesh.bhardwaj@amd.com \
    --to=rajneesh.bhardwaj@amd.com \
    --cc=airlied@redhat.com \
    --cc=alexander.deucher@amd.com \
    --cc=amd-gfx@lists.freedesktop.org \
    --cc=christian.koenig@amd.com \
    --cc=daniel.vetter@ffwll.ch \
    --cc=david.yatsin@amd.com \
    --cc=dri-devel@lists.freedesktop.org \
    --cc=felix.kuehling@amd.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.