All of lore.kernel.org
 help / color / mirror / Atom feed
From: Rajneesh Bhardwaj <rajneesh.bhardwaj@amd.com>
To: <amd-gfx@lists.freedesktop.org>, <dri-devel@lists.freedesktop.org>
Cc: daniel.vetter@ffwll.ch, felix.kuehling@amd.com,
	Rajneesh Bhardwaj <rajneesh.bhardwaj@amd.com>,
	alexander.deucher@amd.com, airlied@redhat.com,
	christian.koenig@amd.com
Subject: [Patch v4 22/24] drm/amdkfd: CRIU Save Shared Virtual Memory ranges
Date: Wed, 22 Dec 2021 19:37:09 -0500	[thread overview]
Message-ID: <20211223003711.13064-23-rajneesh.bhardwaj@amd.com> (raw)
In-Reply-To: <20211223003711.13064-1-rajneesh.bhardwaj@amd.com>

During checkpoint stage, save the shared virtual memory ranges and
attributes for the target process. A process may contain a number of svm
ranges and each range might contain a number of arrtibutes. While not
all attributes may be applicable for a given prange but during
checkpoint we store all possible values for the max possible attribute
types.

Signed-off-by: Rajneesh Bhardwaj <rajneesh.bhardwaj@amd.com>
---
 drivers/gpu/drm/amd/amdkfd/kfd_chardev.c |  4 +-
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c     | 95 ++++++++++++++++++++++++
 drivers/gpu/drm/amd/amdkfd/kfd_svm.h     | 10 +++
 3 files changed, 108 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index 1c25d5e9067c..916b8d000317 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -2186,7 +2186,9 @@ static int criu_checkpoint(struct file *filep,
 		if (ret)
 			goto close_bo_fds;
 
-		/* TODO: Dump SVM-Ranges */
+		ret = kfd_criu_checkpoint_svm(p, (uint8_t __user *)args->priv_data, &priv_offset);
+		if (ret)
+			goto close_bo_fds;
 	}
 
 close_bo_fds:
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index 49e05fb5c898..6d59f1bedcf2 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -3478,6 +3478,101 @@ int svm_range_get_info(struct kfd_process *p, uint32_t *num_svm_ranges,
 	return 0;
 }
 
+int kfd_criu_checkpoint_svm(struct kfd_process *p,
+			    uint8_t __user *user_priv_data,
+			    uint64_t *priv_data_offset)
+{
+	struct kfd_criu_svm_range_priv_data *svm_priv = NULL;
+	struct kfd_ioctl_svm_attribute *query_attr = NULL;
+	uint64_t svm_priv_data_size, query_attr_size = 0;
+	int index, nattr_common = 4, ret = 0;
+	struct svm_range_list *svms;
+	int num_devices = p->n_pdds;
+	struct svm_range *prange;
+	struct mm_struct *mm;
+
+	svms = &p->svms;
+	if (!svms)
+		return -EINVAL;
+
+	mm = get_task_mm(p->lead_thread);
+	if (!mm) {
+		pr_err("failed to get mm for the target process\n");
+		return -ESRCH;
+	}
+
+	query_attr_size = sizeof(struct kfd_ioctl_svm_attribute) *
+				(nattr_common + num_devices);
+
+	query_attr = kzalloc(query_attr_size, GFP_KERNEL);
+	if (!query_attr) {
+		ret = -ENOMEM;
+		goto exit;
+	}
+
+	query_attr[0].type = KFD_IOCTL_SVM_ATTR_PREFERRED_LOC;
+	query_attr[1].type = KFD_IOCTL_SVM_ATTR_PREFETCH_LOC;
+	query_attr[2].type = KFD_IOCTL_SVM_ATTR_SET_FLAGS;
+	query_attr[3].type = KFD_IOCTL_SVM_ATTR_GRANULARITY;
+
+	for (index = 0; index < num_devices; index++) {
+		struct kfd_process_device *pdd = p->pdds[index];
+
+		query_attr[index + nattr_common].type =
+			KFD_IOCTL_SVM_ATTR_ACCESS;
+		query_attr[index + nattr_common].value = pdd->user_gpu_id;
+	}
+
+	svm_priv_data_size = sizeof(*svm_priv) + query_attr_size;
+
+	svm_priv = kzalloc(svm_priv_data_size, GFP_KERNEL);
+	if (!svm_priv) {
+		ret = -ENOMEM;
+		goto exit_query;
+	}
+
+	index = 0;
+	list_for_each_entry(prange, &svms->list, list) {
+
+		svm_priv->object_type = KFD_CRIU_OBJECT_TYPE_SVM_RANGE;
+		svm_priv->start_addr = prange->start;
+		svm_priv->size = prange->npages;
+		memcpy(&svm_priv->attrs, query_attr, query_attr_size);
+		pr_debug("CRIU: prange: 0x%p start: 0x%lx\t npages: 0x%llx end: 0x%llx\t size: 0x%llx\n",
+			 prange, prange->start, prange->npages,
+			 prange->start + prange->npages - 1,
+			 prange->npages * PAGE_SIZE);
+
+		ret = svm_range_get_attr(p, mm, svm_priv->start_addr,
+					 svm_priv->size,
+					 (nattr_common + num_devices),
+					 svm_priv->attrs);
+		if (ret) {
+			pr_err("CRIU: failed to obtain range attributes\n");
+			goto exit_priv;
+		}
+
+		ret = copy_to_user(user_priv_data + *priv_data_offset,
+				   svm_priv, svm_priv_data_size);
+		if (ret) {
+			pr_err("Failed to copy svm priv to user\n");
+			goto exit_priv;
+		}
+
+		*priv_data_offset += svm_priv_data_size;
+
+	}
+
+
+exit_priv:
+	kfree(svm_priv);
+exit_query:
+	kfree(query_attr);
+exit:
+	mmput(mm);
+	return ret;
+}
+
 int
 svm_ioctl(struct kfd_process *p, enum kfd_ioctl_svm_op op, uint64_t start,
 	  uint64_t size, uint32_t nattrs, struct kfd_ioctl_svm_attribute *attrs)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
index c8c1251920d4..b00576db5baa 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
@@ -188,6 +188,9 @@ void svm_range_prefault(struct svm_range *prange, struct mm_struct *mm,
 			void *owner);
 int svm_range_get_info(struct kfd_process *p, uint32_t *num_svm_ranges,
 		       uint64_t *svm_priv_data_size);
+int kfd_criu_checkpoint_svm(struct kfd_process *p,
+			    uint8_t __user *user_priv_data,
+			    uint64_t *priv_offset);
 struct kfd_process_device *
 svm_range_get_pdd_by_adev(struct svm_range *prange, struct amdgpu_device *adev);
 void svm_range_list_lock_and_flush_work(struct svm_range_list *svms, struct mm_struct *mm);
@@ -234,6 +237,13 @@ static inline int svm_range_get_info(struct kfd_process *p,
 	return 0;
 }
 
+static inline int kfd_criu_checkpoint_svm(struct kfd_process *p,
+					  uint8_t __user *user_priv_data,
+					  uint64_t *priv_offset)
+{
+	return 0;
+}
+
 #define KFD_IS_SVM_API_SUPPORTED(dev) false
 
 #endif /* IS_ENABLED(CONFIG_HSA_AMD_SVM) */
-- 
2.17.1


  parent reply	other threads:[~2021-12-23  0:38 UTC|newest]

Thread overview: 39+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-12-23  0:36 [Patch v4 00/24] CHECKPOINT RESTORE WITH ROCm Rajneesh Bhardwaj
2021-12-23  0:36 ` [Patch v4 01/24] x86/configs: CRIU update debug rock defconfig Rajneesh Bhardwaj
2021-12-23  0:36 ` [Patch v4 02/24] x86/configs: Add rock-rel_defconfig for amd-feature-criu branch Rajneesh Bhardwaj
2021-12-23  0:36 ` [Patch v4 03/24] drm/amdkfd: CRIU Introduce Checkpoint-Restore APIs Rajneesh Bhardwaj
2022-01-10 22:08   ` Felix Kuehling
2021-12-23  0:36 ` [Patch v4 04/24] drm/amdkfd: CRIU Implement KFD process_info ioctl Rajneesh Bhardwaj
2022-01-10 22:47   ` Felix Kuehling
2021-12-23  0:36 ` [Patch v4 05/24] drm/amdkfd: CRIU Implement KFD checkpoint ioctl Rajneesh Bhardwaj
2021-12-23  0:36 ` [Patch v4 06/24] drm/amdkfd: CRIU Implement KFD restore ioctl Rajneesh Bhardwaj
2022-01-10 23:01   ` Felix Kuehling
2021-12-23  0:36 ` [Patch v4 07/24] drm/amdkfd: CRIU Implement KFD resume ioctl Rajneesh Bhardwaj
2022-01-10 23:16   ` Felix Kuehling
2021-12-23  0:36 ` [Patch v4 08/24] drm/amdkfd: CRIU Implement KFD unpause operation Rajneesh Bhardwaj
2021-12-23  0:36 ` [Patch v4 09/24] drm/amdkfd: CRIU add queues support Rajneesh Bhardwaj
2021-12-23  0:36 ` [Patch v4 10/24] drm/amdkfd: CRIU restore queue ids Rajneesh Bhardwaj
2021-12-23  0:36 ` [Patch v4 11/24] drm/amdkfd: CRIU restore sdma id for queues Rajneesh Bhardwaj
2021-12-23  0:36 ` [Patch v4 12/24] drm/amdkfd: CRIU restore queue doorbell id Rajneesh Bhardwaj
2021-12-23  0:37 ` [Patch v4 13/24] drm/amdkfd: CRIU checkpoint and restore queue mqds Rajneesh Bhardwaj
2022-01-10 23:32   ` Felix Kuehling
2021-12-23  0:37 ` [Patch v4 14/24] drm/amdkfd: CRIU checkpoint and restore queue control stack Rajneesh Bhardwaj
2021-12-23  0:37 ` [Patch v4 15/24] drm/amdkfd: CRIU checkpoint and restore events Rajneesh Bhardwaj
2021-12-23  0:37 ` [Patch v4 16/24] drm/amdkfd: CRIU implement gpu_id remapping Rajneesh Bhardwaj
2021-12-23  0:37 ` [Patch v4 17/24] drm/amdkfd: CRIU export BOs as prime dmabuf objects Rajneesh Bhardwaj
2021-12-23  0:37 ` [Patch v4 18/24] drm/amdkfd: CRIU checkpoint and restore xnack mode Rajneesh Bhardwaj
2022-01-05 15:22   ` philip yang
2022-01-11  0:10     ` Felix Kuehling
2022-01-11 15:49       ` philip yang
2021-12-23  0:37 ` [Patch v4 19/24] drm/amdkfd: CRIU allow external mm for svm ranges Rajneesh Bhardwaj
2021-12-23  0:37 ` [Patch v4 20/24] drm/amdkfd: use user_gpu_id " Rajneesh Bhardwaj
2021-12-23  0:37 ` [Patch v4 21/24] drm/amdkfd: CRIU Discover " Rajneesh Bhardwaj
2022-01-05 14:48   ` philip yang
2022-01-10 23:11   ` philip yang
2021-12-23  0:37 ` Rajneesh Bhardwaj [this message]
2021-12-23  0:37 ` [Patch v4 23/24] drm/amdkfd: CRIU prepare for svm resume Rajneesh Bhardwaj
2022-01-05 14:43   ` philip yang
2022-01-10 23:58     ` Felix Kuehling
2022-01-11 15:58       ` philip yang
2021-12-23  0:37 ` [Patch v4 24/24] drm/amdkfd: CRIU resume shared virtual memory ranges Rajneesh Bhardwaj
2022-01-11  0:03   ` Felix Kuehling

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20211223003711.13064-23-rajneesh.bhardwaj@amd.com \
    --to=rajneesh.bhardwaj@amd.com \
    --cc=airlied@redhat.com \
    --cc=alexander.deucher@amd.com \
    --cc=amd-gfx@lists.freedesktop.org \
    --cc=christian.koenig@amd.com \
    --cc=daniel.vetter@ffwll.ch \
    --cc=dri-devel@lists.freedesktop.org \
    --cc=felix.kuehling@amd.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.