All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/6] Bug:211277 fix backport for 5.10 stable
@ 2021-12-09 22:09 James Zhu
  2021-12-09 22:09 ` [PATCH 1/6] drm/amd/amdkfd: adjust dummy functions' placement James Zhu
                   ` (6 more replies)
  0 siblings, 7 replies; 14+ messages in thread
From: James Zhu @ 2021-12-09 22:09 UTC (permalink / raw)
  To: stable; +Cc: jzhums, alexander.deucher, kolAflash

These patches are back port for 5.10 stable.
They are cherry-picked from 5.14 stable.

BugFix: https://bugzilla.kernel.org/show_bug.cgi?id=211277

James Zhu (3):
  drm/amdkfd: separate kfd_iommu_resume from kfd_resume
  drm/amdgpu: add amdgpu_amdkfd_resume_iommu
  drm/amdgpu: move iommu_resume before ip init/resume

Lang Yu (1):
  drm/amd/amdkfd: adjust dummy functions' placement

Yifan Zhang (2):
  drm/amdgpu: init iommu after amdkfd device init
  drm/amdkfd: fix boot failure when iommu is disabled in Picasso.

 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c |  97 ++------------
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 145 ++++++++++++++++++---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |   8 ++
 drivers/gpu/drm/amd/amdkfd/kfd_device.c    |  15 ++-
 4 files changed, 155 insertions(+), 110 deletions(-)

-- 
2.25.1


^ permalink raw reply	[flat|nested] 14+ messages in thread

* [PATCH 1/6] drm/amd/amdkfd: adjust dummy functions' placement
  2021-12-09 22:09 [PATCH 0/6] Bug:211277 fix backport for 5.10 stable James Zhu
@ 2021-12-09 22:09 ` James Zhu
  2021-12-09 22:09 ` [PATCH 2/6] drm/amdkfd: separate kfd_iommu_resume from kfd_resume James Zhu
                   ` (5 subsequent siblings)
  6 siblings, 0 replies; 14+ messages in thread
From: James Zhu @ 2021-12-09 22:09 UTC (permalink / raw)
  To: stable
  Cc: jzhums, alexander.deucher, kolAflash, Lang Yu, Felix Kuehling, Huang Rui

From: Lang Yu <Lang.Yu@amd.com>

commit cd63989e0e6aa2eb66b461f2bae769e2550e47ac upstream.

Move all the dummy functions in amdgpu_amdkfd.c to
amdgpu_amdkfd.h as inline functions.

Signed-off-by: Lang Yu <Lang.Yu@amd.com>
Suggested-by: Felix Kuehling <Felix.Kuehling@amd.com>
Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
Reviewed-by: Huang Rui <ray.huang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: James Zhu <James.Zhu@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c |  87 -------------
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 138 ++++++++++++++++++---
 2 files changed, 119 insertions(+), 106 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
index 0544460653b9..b23b31dc570e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -47,12 +47,8 @@ int amdgpu_amdkfd_init(void)
 	amdgpu_amdkfd_total_mem_size = si.totalram - si.totalhigh;
 	amdgpu_amdkfd_total_mem_size *= si.mem_unit;
 
-#ifdef CONFIG_HSA_AMD
 	ret = kgd2kfd_init();
 	amdgpu_amdkfd_gpuvm_init_mem_limits();
-#else
-	ret = -ENOENT;
-#endif
 	kfd_initialized = !ret;
 
 	return ret;
@@ -695,86 +691,3 @@ bool amdgpu_amdkfd_have_atomics_support(struct kgd_dev *kgd)
 
 	return adev->have_atomics_support;
 }
-
-#ifndef CONFIG_HSA_AMD
-bool amdkfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm)
-{
-	return false;
-}
-
-void amdgpu_amdkfd_unreserve_memory_limit(struct amdgpu_bo *bo)
-{
-}
-
-int amdgpu_amdkfd_remove_fence_on_pt_pd_bos(struct amdgpu_bo *bo)
-{
-	return 0;
-}
-
-void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev,
-					struct amdgpu_vm *vm)
-{
-}
-
-struct amdgpu_amdkfd_fence *to_amdgpu_amdkfd_fence(struct dma_fence *f)
-{
-	return NULL;
-}
-
-int amdgpu_amdkfd_evict_userptr(struct kgd_mem *mem, struct mm_struct *mm)
-{
-	return 0;
-}
-
-struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd, struct pci_dev *pdev,
-			      unsigned int asic_type, bool vf)
-{
-	return NULL;
-}
-
-bool kgd2kfd_device_init(struct kfd_dev *kfd,
-			 struct drm_device *ddev,
-			 const struct kgd2kfd_shared_resources *gpu_resources)
-{
-	return false;
-}
-
-void kgd2kfd_device_exit(struct kfd_dev *kfd)
-{
-}
-
-void kgd2kfd_exit(void)
-{
-}
-
-void kgd2kfd_suspend(struct kfd_dev *kfd, bool run_pm)
-{
-}
-
-int kgd2kfd_resume(struct kfd_dev *kfd, bool run_pm)
-{
-	return 0;
-}
-
-int kgd2kfd_pre_reset(struct kfd_dev *kfd)
-{
-	return 0;
-}
-
-int kgd2kfd_post_reset(struct kfd_dev *kfd)
-{
-	return 0;
-}
-
-void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry)
-{
-}
-
-void kgd2kfd_set_sram_ecc_flag(struct kfd_dev *kfd)
-{
-}
-
-void kgd2kfd_smi_event_throttle(struct kfd_dev *kfd, uint32_t throttle_bitmask)
-{
-}
-#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index ea391ca7f2f1..a81d9cacf9b8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -94,11 +94,6 @@ enum kgd_engine_type {
 	KGD_ENGINE_MAX
 };
 
-struct amdgpu_amdkfd_fence *amdgpu_amdkfd_fence_create(u64 context,
-						       struct mm_struct *mm);
-bool amdkfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm);
-struct amdgpu_amdkfd_fence *to_amdgpu_amdkfd_fence(struct dma_fence *f);
-int amdgpu_amdkfd_remove_fence_on_pt_pd_bos(struct amdgpu_bo *bo);
 
 struct amdkfd_process_info {
 	/* List head of all VMs that belong to a KFD process */
@@ -132,8 +127,6 @@ void amdgpu_amdkfd_interrupt(struct amdgpu_device *adev,
 void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev);
 void amdgpu_amdkfd_device_init(struct amdgpu_device *adev);
 void amdgpu_amdkfd_device_fini(struct amdgpu_device *adev);
-
-int amdgpu_amdkfd_evict_userptr(struct kgd_mem *mem, struct mm_struct *mm);
 int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum kgd_engine_type engine,
 				uint32_t vmid, uint64_t gpu_addr,
 				uint32_t *ib_cmd, uint32_t ib_len);
@@ -153,6 +146,38 @@ void amdgpu_amdkfd_gpu_reset(struct kgd_dev *kgd);
 int amdgpu_queue_mask_bit_to_set_resource_bit(struct amdgpu_device *adev,
 					int queue_bit);
 
+struct amdgpu_amdkfd_fence *amdgpu_amdkfd_fence_create(u64 context,
+								struct mm_struct *mm);
+#if IS_ENABLED(CONFIG_HSA_AMD)
+bool amdkfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm);
+struct amdgpu_amdkfd_fence *to_amdgpu_amdkfd_fence(struct dma_fence *f);
+int amdgpu_amdkfd_remove_fence_on_pt_pd_bos(struct amdgpu_bo *bo);
+int amdgpu_amdkfd_evict_userptr(struct kgd_mem *mem, struct mm_struct *mm);
+#else
+static inline
+bool amdkfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm)
+{
+	return false;
+}
+
+static inline
+struct amdgpu_amdkfd_fence *to_amdgpu_amdkfd_fence(struct dma_fence *f)
+{
+	return NULL;
+}
+
+static inline
+int amdgpu_amdkfd_remove_fence_on_pt_pd_bos(struct amdgpu_bo *bo)
+{
+	return 0;
+}
+
+static inline
+int amdgpu_amdkfd_evict_userptr(struct kgd_mem *mem, struct mm_struct *mm)
+{
+	return 0;
+}
+#endif
 /* Shared API */
 int amdgpu_amdkfd_alloc_gtt_mem(struct kgd_dev *kgd, size_t size,
 				void **mem_obj, uint64_t *gpu_addr,
@@ -215,8 +240,6 @@ int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct kgd_dev *kgd,
 					struct file *filp, u32 pasid,
 					void **vm, void **process_info,
 					struct dma_fence **ef);
-void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev,
-				struct amdgpu_vm *vm);
 void amdgpu_amdkfd_gpuvm_destroy_process_vm(struct kgd_dev *kgd, void *vm);
 void amdgpu_amdkfd_gpuvm_release_process_vm(struct kgd_dev *kgd, void *vm);
 uint64_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *vm);
@@ -236,23 +259,43 @@ int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct kgd_dev *kgd,
 		struct kgd_mem *mem, void **kptr, uint64_t *size);
 int amdgpu_amdkfd_gpuvm_restore_process_bos(void *process_info,
 					    struct dma_fence **ef);
-
 int amdgpu_amdkfd_gpuvm_get_vm_fault_info(struct kgd_dev *kgd,
 					      struct kfd_vm_fault_info *info);
-
 int amdgpu_amdkfd_gpuvm_import_dmabuf(struct kgd_dev *kgd,
 				      struct dma_buf *dmabuf,
 				      uint64_t va, void *vm,
 				      struct kgd_mem **mem, uint64_t *size,
 				      uint64_t *mmap_offset);
-
-void amdgpu_amdkfd_gpuvm_init_mem_limits(void);
-void amdgpu_amdkfd_unreserve_memory_limit(struct amdgpu_bo *bo);
-
 int amdgpu_amdkfd_get_tile_config(struct kgd_dev *kgd,
 				struct tile_config *config);
+#if IS_ENABLED(CONFIG_HSA_AMD)
+void amdgpu_amdkfd_gpuvm_init_mem_limits(void);
+void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev,
+				struct amdgpu_vm *vm);
+void amdgpu_amdkfd_unreserve_memory_limit(struct amdgpu_bo *bo);
+#else
+static inline
+void amdgpu_amdkfd_gpuvm_init_mem_limits(void)
+{
+}
 
+static inline
+void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev,
+					struct amdgpu_vm *vm)
+{
+}
+
+static inline
+void amdgpu_amdkfd_unreserve_memory_limit(struct amdgpu_bo *bo)
+{
+}
+#endif
 /* KGD2KFD callbacks */
+int kgd2kfd_quiesce_mm(struct mm_struct *mm);
+int kgd2kfd_resume_mm(struct mm_struct *mm);
+int kgd2kfd_schedule_evict_and_restore_process(struct mm_struct *mm,
+						struct dma_fence *fence);
+#if IS_ENABLED(CONFIG_HSA_AMD)
 int kgd2kfd_init(void);
 void kgd2kfd_exit(void);
 struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd, struct pci_dev *pdev,
@@ -266,11 +309,68 @@ int kgd2kfd_resume(struct kfd_dev *kfd, bool run_pm);
 int kgd2kfd_pre_reset(struct kfd_dev *kfd);
 int kgd2kfd_post_reset(struct kfd_dev *kfd);
 void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry);
-int kgd2kfd_quiesce_mm(struct mm_struct *mm);
-int kgd2kfd_resume_mm(struct mm_struct *mm);
-int kgd2kfd_schedule_evict_and_restore_process(struct mm_struct *mm,
-					       struct dma_fence *fence);
 void kgd2kfd_set_sram_ecc_flag(struct kfd_dev *kfd);
 void kgd2kfd_smi_event_throttle(struct kfd_dev *kfd, uint32_t throttle_bitmask);
+#else
+static inline int kgd2kfd_init(void)
+{
+	return -ENOENT;
+}
 
+static inline void kgd2kfd_exit(void)
+{
+}
+
+static inline
+struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd, struct pci_dev *pdev,
+					unsigned int asic_type, bool vf)
+{
+	return NULL;
+}
+
+static inline
+bool kgd2kfd_device_init(struct kfd_dev *kfd, struct drm_device *ddev,
+				const struct kgd2kfd_shared_resources *gpu_resources)
+{
+	return false;
+}
+
+static inline void kgd2kfd_device_exit(struct kfd_dev *kfd)
+{
+}
+
+static inline void kgd2kfd_suspend(struct kfd_dev *kfd, bool run_pm)
+{
+}
+
+static inline int kgd2kfd_resume(struct kfd_dev *kfd, bool run_pm)
+{
+	return 0;
+}
+
+static inline int kgd2kfd_pre_reset(struct kfd_dev *kfd)
+{
+	return 0;
+}
+
+static inline int kgd2kfd_post_reset(struct kfd_dev *kfd)
+{
+	return 0;
+}
+
+static inline
+void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry)
+{
+}
+
+static inline
+void kgd2kfd_set_sram_ecc_flag(struct kfd_dev *kfd)
+{
+}
+
+static inline
+void kgd2kfd_smi_event_throttle(struct kfd_dev *kfd, uint32_t throttle_bitmask)
+{
+}
+#endif
 #endif /* AMDGPU_AMDKFD_H_INCLUDED */
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH 2/6] drm/amdkfd: separate kfd_iommu_resume from kfd_resume
  2021-12-09 22:09 [PATCH 0/6] Bug:211277 fix backport for 5.10 stable James Zhu
  2021-12-09 22:09 ` [PATCH 1/6] drm/amd/amdkfd: adjust dummy functions' placement James Zhu
@ 2021-12-09 22:09 ` James Zhu
  2021-12-09 22:09 ` [PATCH 3/6] drm/amdgpu: add amdgpu_amdkfd_resume_iommu James Zhu
                   ` (4 subsequent siblings)
  6 siblings, 0 replies; 14+ messages in thread
From: James Zhu @ 2021-12-09 22:09 UTC (permalink / raw)
  To: stable
  Cc: jzhums, alexander.deucher, kolAflash, Felix Kuehling, Greg Kroah-Hartman

commit fefc01f042f44ede373ee66773b8238dd8fdcb55 upstream.

Separate kfd_iommu_resume from kfd_resume for fine-tuning
of amdgpu device init/resume/reset/recovery sequence.

v2: squash in fix for !CONFIG_HSA_AMD

Bug: https://bugzilla.kernel.org/show_bug.cgi?id=211277
Signed-off-by: James Zhu <James.Zhu@amd.com>
Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Cc: stable@vger.kernel.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: James Zhu <James.Zhu@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h |  6 ++++++
 drivers/gpu/drm/amd/amdkfd/kfd_device.c    | 12 ++++++++----
 2 files changed, 14 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index a81d9cacf9b8..8a402a3df412 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -305,6 +305,7 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
 			 const struct kgd2kfd_shared_resources *gpu_resources);
 void kgd2kfd_device_exit(struct kfd_dev *kfd);
 void kgd2kfd_suspend(struct kfd_dev *kfd, bool run_pm);
+int kgd2kfd_resume_iommu(struct kfd_dev *kfd);
 int kgd2kfd_resume(struct kfd_dev *kfd, bool run_pm);
 int kgd2kfd_pre_reset(struct kfd_dev *kfd);
 int kgd2kfd_post_reset(struct kfd_dev *kfd);
@@ -343,6 +344,11 @@ static inline void kgd2kfd_suspend(struct kfd_dev *kfd, bool run_pm)
 {
 }
 
+static int __maybe_unused kgd2kfd_resume_iommu(struct kfd_dev *kfd)
+{
+	return 0;
+}
+
 static inline int kgd2kfd_resume(struct kfd_dev *kfd, bool run_pm)
 {
 	return 0;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index 5751bddc9cad..1204dae85797 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -896,17 +896,21 @@ int kgd2kfd_resume(struct kfd_dev *kfd, bool run_pm)
 	return ret;
 }
 
-static int kfd_resume(struct kfd_dev *kfd)
+int kgd2kfd_resume_iommu(struct kfd_dev *kfd)
 {
 	int err = 0;
 
 	err = kfd_iommu_resume(kfd);
-	if (err) {
+	if (err)
 		dev_err(kfd_device,
 			"Failed to resume IOMMU for device %x:%x\n",
 			kfd->pdev->vendor, kfd->pdev->device);
-		return err;
-	}
+	return err;
+}
+
+static int kfd_resume(struct kfd_dev *kfd)
+{
+	int err = 0;
 
 	err = kfd->dqm->ops.start(kfd->dqm);
 	if (err) {
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH 3/6] drm/amdgpu: add amdgpu_amdkfd_resume_iommu
  2021-12-09 22:09 [PATCH 0/6] Bug:211277 fix backport for 5.10 stable James Zhu
  2021-12-09 22:09 ` [PATCH 1/6] drm/amd/amdkfd: adjust dummy functions' placement James Zhu
  2021-12-09 22:09 ` [PATCH 2/6] drm/amdkfd: separate kfd_iommu_resume from kfd_resume James Zhu
@ 2021-12-09 22:09 ` James Zhu
  2021-12-09 22:09 ` [PATCH 4/6] drm/amdgpu: move iommu_resume before ip init/resume James Zhu
                   ` (3 subsequent siblings)
  6 siblings, 0 replies; 14+ messages in thread
From: James Zhu @ 2021-12-09 22:09 UTC (permalink / raw)
  To: stable
  Cc: jzhums, alexander.deucher, kolAflash, Felix Kuehling, Greg Kroah-Hartman

commit 8066008482e533e91934bee49765bf8b4a7c40db upstream.

Add amdgpu_amdkfd_resume_iommu for amdgpu.

Bug: https://bugzilla.kernel.org/show_bug.cgi?id=211277
Signed-off-by: James Zhu <James.Zhu@amd.com>
Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Cc: stable@vger.kernel.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: James Zhu <James.Zhu@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 10 ++++++++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h |  1 +
 2 files changed, 11 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
index b23b31dc570e..fb6230c62daa 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -190,6 +190,16 @@ void amdgpu_amdkfd_suspend(struct amdgpu_device *adev, bool run_pm)
 		kgd2kfd_suspend(adev->kfd.dev, run_pm);
 }
 
+int amdgpu_amdkfd_resume_iommu(struct amdgpu_device *adev)
+{
+	int r = 0;
+
+	if (adev->kfd.dev)
+		r = kgd2kfd_resume_iommu(adev->kfd.dev);
+
+	return r;
+}
+
 int amdgpu_amdkfd_resume(struct amdgpu_device *adev, bool run_pm)
 {
 	int r = 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index 8a402a3df412..32e385f287cb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -121,6 +121,7 @@ int amdgpu_amdkfd_init(void);
 void amdgpu_amdkfd_fini(void);
 
 void amdgpu_amdkfd_suspend(struct amdgpu_device *adev, bool run_pm);
+int amdgpu_amdkfd_resume_iommu(struct amdgpu_device *adev);
 int amdgpu_amdkfd_resume(struct amdgpu_device *adev, bool run_pm);
 void amdgpu_amdkfd_interrupt(struct amdgpu_device *adev,
 			const void *ih_ring_entry);
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH 4/6] drm/amdgpu: move iommu_resume before ip init/resume
  2021-12-09 22:09 [PATCH 0/6] Bug:211277 fix backport for 5.10 stable James Zhu
                   ` (2 preceding siblings ...)
  2021-12-09 22:09 ` [PATCH 3/6] drm/amdgpu: add amdgpu_amdkfd_resume_iommu James Zhu
@ 2021-12-09 22:09 ` James Zhu
  2021-12-09 22:09 ` [PATCH 5/6] drm/amdgpu: init iommu after amdkfd device init James Zhu
                   ` (2 subsequent siblings)
  6 siblings, 0 replies; 14+ messages in thread
From: James Zhu @ 2021-12-09 22:09 UTC (permalink / raw)
  To: stable
  Cc: jzhums, alexander.deucher, kolAflash, Felix Kuehling, Greg Kroah-Hartman

commit f02abeb0779700c308e661a412451b38962b8a0b upstream.

Separate iommu_resume from kfd_resume, and move it before
other amdgpu ip init/resume.

Bug: https://bugzilla.kernel.org/show_bug.cgi?id=211277
Signed-off-by: James Zhu <James.Zhu@amd.com>
Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Cc: stable@vger.kernel.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: James Zhu <James.Zhu@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 97723f2b5ece..2947bded074a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2220,6 +2220,10 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev)
 	if (r)
 		goto init_failed;
 
+	r = amdgpu_amdkfd_resume_iommu(adev);
+	if (r)
+		goto init_failed;
+
 	r = amdgpu_device_ip_hw_init_phase1(adev);
 	if (r)
 		goto init_failed;
@@ -2913,6 +2917,10 @@ static int amdgpu_device_ip_resume(struct amdgpu_device *adev)
 {
 	int r;
 
+	r = amdgpu_amdkfd_resume_iommu(adev);
+	if (r)
+		return r;
+
 	r = amdgpu_device_ip_resume_phase1(adev);
 	if (r)
 		return r;
@@ -4296,6 +4304,10 @@ static int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive,
 
 			if (!r) {
 				dev_info(tmp_adev->dev, "GPU reset succeeded, trying to resume\n");
+				r = amdgpu_amdkfd_resume_iommu(tmp_adev);
+				if (r)
+					goto out;
+
 				r = amdgpu_device_ip_resume_phase1(tmp_adev);
 				if (r)
 					goto out;
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH 5/6] drm/amdgpu: init iommu after amdkfd device init
  2021-12-09 22:09 [PATCH 0/6] Bug:211277 fix backport for 5.10 stable James Zhu
                   ` (3 preceding siblings ...)
  2021-12-09 22:09 ` [PATCH 4/6] drm/amdgpu: move iommu_resume before ip init/resume James Zhu
@ 2021-12-09 22:09 ` James Zhu
  2021-12-09 22:09 ` [PATCH 6/6] drm/amdkfd: fix boot failure when iommu is disabled in Picasso James Zhu
  2021-12-10  6:33 ` [PATCH 0/6] Bug:211277 fix backport for 5.10 stable Greg KH
  6 siblings, 0 replies; 14+ messages in thread
From: James Zhu @ 2021-12-09 22:09 UTC (permalink / raw)
  To: stable
  Cc: jzhums, alexander.deucher, kolAflash, Yifan Zhang,
	Felix Kuehling, Sasha Levin

From: Yifan Zhang <yifan1.zhang@amd.com>

[ Upstream commit 714d9e4574d54596973ee3b0624ee4a16264d700 ]

This patch is to fix clinfo failure in Raven/Picasso:

Number of platforms: 1
  Platform Profile: FULL_PROFILE
  Platform Version: OpenCL 2.2 AMD-APP (3364.0)
  Platform Name: AMD Accelerated Parallel Processing
  Platform Vendor: Advanced Micro Devices, Inc.
  Platform Extensions: cl_khr_icd cl_amd_event_callback

  Platform Name: AMD Accelerated Parallel Processing Number of devices: 0

Signed-off-by: Yifan Zhang <yifan1.zhang@amd.com>
Reviewed-by: James Zhu <James.Zhu@amd.com>
Tested-by: James Zhu <James.Zhu@amd.com>
Acked-by: Felix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Sasha Levin <sashal@kernel.org>
Signed-off-by: James Zhu <James.Zhu@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 2947bded074a..488e574f5da1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2220,10 +2220,6 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev)
 	if (r)
 		goto init_failed;
 
-	r = amdgpu_amdkfd_resume_iommu(adev);
-	if (r)
-		goto init_failed;
-
 	r = amdgpu_device_ip_hw_init_phase1(adev);
 	if (r)
 		goto init_failed;
@@ -2259,6 +2255,10 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev)
 		amdgpu_xgmi_add_device(adev);
 	amdgpu_amdkfd_device_init(adev);
 
+	r = amdgpu_amdkfd_resume_iommu(adev);
+	if (r)
+		goto init_failed;
+
 	amdgpu_fru_get_product_info(adev);
 
 init_failed:
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH 6/6] drm/amdkfd: fix boot failure when iommu is disabled in Picasso.
  2021-12-09 22:09 [PATCH 0/6] Bug:211277 fix backport for 5.10 stable James Zhu
                   ` (4 preceding siblings ...)
  2021-12-09 22:09 ` [PATCH 5/6] drm/amdgpu: init iommu after amdkfd device init James Zhu
@ 2021-12-09 22:09 ` James Zhu
  2021-12-10 13:33   ` Greg Kroah-Hartman
  2021-12-10  6:33 ` [PATCH 0/6] Bug:211277 fix backport for 5.10 stable Greg KH
  6 siblings, 1 reply; 14+ messages in thread
From: James Zhu @ 2021-12-09 22:09 UTC (permalink / raw)
  To: stable
  Cc: jzhums, alexander.deucher, kolAflash, Yifan Zhang, youling,
	Greg Kroah-Hartman

From: Yifan Zhang <yifan1.zhang@amd.com>

commit afd18180c07026f94a80ff024acef5f4159084a4 upstream.

When IOMMU disabled in sbios and kfd in iommuv2 path, iommuv2
init will fail. But this failure should not block amdgpu driver init.

Reported-by: youling <youling257@gmail.com>
Tested-by: youling <youling257@gmail.com>
Signed-off-by: Yifan Zhang <yifan1.zhang@amd.com>
Reviewed-by: James Zhu <James.Zhu@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: James Zhu <James.Zhu@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 4 ----
 drivers/gpu/drm/amd/amdkfd/kfd_device.c    | 3 +++
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 488e574f5da1..f262c4e7a48a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2255,10 +2255,6 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev)
 		amdgpu_xgmi_add_device(adev);
 	amdgpu_amdkfd_device_init(adev);
 
-	r = amdgpu_amdkfd_resume_iommu(adev);
-	if (r)
-		goto init_failed;
-
 	amdgpu_fru_get_product_info(adev);
 
 init_failed:
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index 1204dae85797..b35f0af71f00 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -751,6 +751,9 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
 
 	kfd_cwsr_init(kfd);
 
+	if (kgd2kfd_resume_iommu(kfd))
+		goto device_iommu_error;
+
 	if (kfd_resume(kfd))
 		goto kfd_resume_error;
 
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 14+ messages in thread

* Re: [PATCH 0/6] Bug:211277 fix backport for 5.10 stable
  2021-12-09 22:09 [PATCH 0/6] Bug:211277 fix backport for 5.10 stable James Zhu
                   ` (5 preceding siblings ...)
  2021-12-09 22:09 ` [PATCH 6/6] drm/amdkfd: fix boot failure when iommu is disabled in Picasso James Zhu
@ 2021-12-10  6:33 ` Greg KH
  6 siblings, 0 replies; 14+ messages in thread
From: Greg KH @ 2021-12-10  6:33 UTC (permalink / raw)
  To: James Zhu; +Cc: stable, jzhums, alexander.deucher, kolAflash

On Thu, Dec 09, 2021 at 05:09:50PM -0500, James Zhu wrote:
> These patches are back port for 5.10 stable.
> They are cherry-picked from 5.14 stable.
> 
> BugFix: https://bugzilla.kernel.org/show_bug.cgi?id=211277
> 
> James Zhu (3):
>   drm/amdkfd: separate kfd_iommu_resume from kfd_resume
>   drm/amdgpu: add amdgpu_amdkfd_resume_iommu
>   drm/amdgpu: move iommu_resume before ip init/resume
> 
> Lang Yu (1):
>   drm/amd/amdkfd: adjust dummy functions' placement
> 
> Yifan Zhang (2):
>   drm/amdgpu: init iommu after amdkfd device init
>   drm/amdkfd: fix boot failure when iommu is disabled in Picasso.

What has changed from the last time this series was submitted?

thanks,

greg k-h

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 6/6] drm/amdkfd: fix boot failure when iommu is disabled in Picasso.
  2021-12-09 22:09 ` [PATCH 6/6] drm/amdkfd: fix boot failure when iommu is disabled in Picasso James Zhu
@ 2021-12-10 13:33   ` Greg Kroah-Hartman
  2021-12-10 14:14     ` James Zhu
  0 siblings, 1 reply; 14+ messages in thread
From: Greg Kroah-Hartman @ 2021-12-10 13:33 UTC (permalink / raw)
  To: James Zhu
  Cc: stable, jzhums, alexander.deucher, kolAflash, Yifan Zhang, youling

On Thu, Dec 09, 2021 at 05:09:56PM -0500, James Zhu wrote:
> From: Yifan Zhang <yifan1.zhang@amd.com>
> 
> commit afd18180c07026f94a80ff024acef5f4159084a4 upstream.
> 
> When IOMMU disabled in sbios and kfd in iommuv2 path, iommuv2
> init will fail. But this failure should not block amdgpu driver init.
> 
> Reported-by: youling <youling257@gmail.com>
> Tested-by: youling <youling257@gmail.com>
> Signed-off-by: Yifan Zhang <yifan1.zhang@amd.com>
> Reviewed-by: James Zhu <James.Zhu@amd.com>
> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
> Signed-off-by: James Zhu <James.Zhu@amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 4 ----
>  drivers/gpu/drm/amd/amdkfd/kfd_device.c    | 3 +++
>  2 files changed, 3 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> index 488e574f5da1..f262c4e7a48a 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> @@ -2255,10 +2255,6 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev)
>  		amdgpu_xgmi_add_device(adev);
>  	amdgpu_amdkfd_device_init(adev);
>  
> -	r = amdgpu_amdkfd_resume_iommu(adev);
> -	if (r)
> -		goto init_failed;
> -
>  	amdgpu_fru_get_product_info(adev);
>  
>  init_failed:
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> index 1204dae85797..b35f0af71f00 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> @@ -751,6 +751,9 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
>  
>  	kfd_cwsr_init(kfd);
>  
> +	if (kgd2kfd_resume_iommu(kfd))
> +		goto device_iommu_error;
> +
>  	if (kfd_resume(kfd))
>  		goto kfd_resume_error;
>  
> -- 
> 2.25.1
> 

Like I said last time, do not change the backport unless you HAVE to.
You did it here again for no good reason :(

greg k-h

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 6/6] drm/amdkfd: fix boot failure when iommu is disabled in Picasso.
  2021-12-10 13:33   ` Greg Kroah-Hartman
@ 2021-12-10 14:14     ` James Zhu
  2021-12-10 14:35       ` Greg Kroah-Hartman
  0 siblings, 1 reply; 14+ messages in thread
From: James Zhu @ 2021-12-10 14:14 UTC (permalink / raw)
  To: Greg Kroah-Hartman, James Zhu
  Cc: stable, jzhums, alexander.deucher, kolAflash, Yifan Zhang, youling


On 2021-12-10 8:33 a.m., Greg Kroah-Hartman wrote:
> On Thu, Dec 09, 2021 at 05:09:56PM -0500, James Zhu wrote:
>> From: Yifan Zhang <yifan1.zhang@amd.com>
>>
>> commit afd18180c07026f94a80ff024acef5f4159084a4 upstream.
>>
>> When IOMMU disabled in sbios and kfd in iommuv2 path, iommuv2
>> init will fail. But this failure should not block amdgpu driver init.
>>
>> Reported-by: youling <youling257@gmail.com>
>> Tested-by: youling <youling257@gmail.com>
>> Signed-off-by: Yifan Zhang <yifan1.zhang@amd.com>
>> Reviewed-by: James Zhu <James.Zhu@amd.com>
>> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
>> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
>> Signed-off-by: James Zhu <James.Zhu@amd.com>
>> ---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 4 ----
>>   drivers/gpu/drm/amd/amdkfd/kfd_device.c    | 3 +++
>>   2 files changed, 3 insertions(+), 4 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>> index 488e574f5da1..f262c4e7a48a 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>> @@ -2255,10 +2255,6 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev)
>>   		amdgpu_xgmi_add_device(adev);
>>   	amdgpu_amdkfd_device_init(adev);
>>   
>> -	r = amdgpu_amdkfd_resume_iommu(adev);
>> -	if (r)
>> -		goto init_failed;
>> -
>>   	amdgpu_fru_get_product_info(adev);
>>   
>>   init_failed:
>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
>> index 1204dae85797..b35f0af71f00 100644
>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
>> @@ -751,6 +751,9 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
>>   
>>   	kfd_cwsr_init(kfd);
>>   
>> +	if (kgd2kfd_resume_iommu(kfd))
>> +		goto device_iommu_error;
>> +
>>   	if (kfd_resume(kfd))
>>   		goto kfd_resume_error;
>>   
>> -- 
>> 2.25.1
>>
> Like I said last time, do not change the backport unless you HAVE to.
> You did it here again for no good reason :(

[JZ] Yes, I should add more explanation next time.

Backport conflict fix to remove  svm_migrate_init((struct amdgpu_device 
*)kfd->kgd);

new AMD svm feature has not been added for 5.10 So it is safe to remove it.

>
> greg k-h

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 6/6] drm/amdkfd: fix boot failure when iommu is disabled in Picasso.
  2021-12-10 14:14     ` James Zhu
@ 2021-12-10 14:35       ` Greg Kroah-Hartman
  2021-12-10 14:46         ` James Zhu
  0 siblings, 1 reply; 14+ messages in thread
From: Greg Kroah-Hartman @ 2021-12-10 14:35 UTC (permalink / raw)
  To: James Zhu
  Cc: James Zhu, stable, jzhums, alexander.deucher, kolAflash,
	Yifan Zhang, youling

On Fri, Dec 10, 2021 at 09:14:30AM -0500, James Zhu wrote:
> 
> On 2021-12-10 8:33 a.m., Greg Kroah-Hartman wrote:
> > On Thu, Dec 09, 2021 at 05:09:56PM -0500, James Zhu wrote:
> > > From: Yifan Zhang <yifan1.zhang@amd.com>
> > > 
> > > commit afd18180c07026f94a80ff024acef5f4159084a4 upstream.
> > > 
> > > When IOMMU disabled in sbios and kfd in iommuv2 path, iommuv2
> > > init will fail. But this failure should not block amdgpu driver init.
> > > 
> > > Reported-by: youling <youling257@gmail.com>
> > > Tested-by: youling <youling257@gmail.com>
> > > Signed-off-by: Yifan Zhang <yifan1.zhang@amd.com>
> > > Reviewed-by: James Zhu <James.Zhu@amd.com>
> > > Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
> > > Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
> > > Signed-off-by: James Zhu <James.Zhu@amd.com>
> > > ---
> > >   drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 4 ----
> > >   drivers/gpu/drm/amd/amdkfd/kfd_device.c    | 3 +++
> > >   2 files changed, 3 insertions(+), 4 deletions(-)
> > > 
> > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> > > index 488e574f5da1..f262c4e7a48a 100644
> > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> > > @@ -2255,10 +2255,6 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev)
> > >   		amdgpu_xgmi_add_device(adev);
> > >   	amdgpu_amdkfd_device_init(adev);
> > > -	r = amdgpu_amdkfd_resume_iommu(adev);
> > > -	if (r)
> > > -		goto init_failed;
> > > -
> > >   	amdgpu_fru_get_product_info(adev);
> > >   init_failed:
> > > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> > > index 1204dae85797..b35f0af71f00 100644
> > > --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> > > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> > > @@ -751,6 +751,9 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
> > >   	kfd_cwsr_init(kfd);
> > > +	if (kgd2kfd_resume_iommu(kfd))
> > > +		goto device_iommu_error;
> > > +
> > >   	if (kfd_resume(kfd))
> > >   		goto kfd_resume_error;
> > > -- 
> > > 2.25.1
> > > 
> > Like I said last time, do not change the backport unless you HAVE to.
> > You did it here again for no good reason :(
> 
> [JZ] Yes, I should add more explanation next time.
> 
> Backport conflict fix to remove  svm_migrate_init((struct amdgpu_device
> *)kfd->kgd);
> 
> new AMD svm feature has not been added for 5.10 So it is safe to remove it.

No, I am talking about the fact that you fixed up a coding style fix in
this backport that is not in the original commit in Linus's tree.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 6/6] drm/amdkfd: fix boot failure when iommu is disabled in Picasso.
  2021-12-10 14:35       ` Greg Kroah-Hartman
@ 2021-12-10 14:46         ` James Zhu
  2021-12-10 15:12           ` Greg Kroah-Hartman
  0 siblings, 1 reply; 14+ messages in thread
From: James Zhu @ 2021-12-10 14:46 UTC (permalink / raw)
  To: Greg Kroah-Hartman
  Cc: James Zhu, stable, jzhums, alexander.deucher, kolAflash,
	Yifan Zhang, youling


On 2021-12-10 9:35 a.m., Greg Kroah-Hartman wrote:
> On Fri, Dec 10, 2021 at 09:14:30AM -0500, James Zhu wrote:
>> On 2021-12-10 8:33 a.m., Greg Kroah-Hartman wrote:
>>> On Thu, Dec 09, 2021 at 05:09:56PM -0500, James Zhu wrote:
>>>> From: Yifan Zhang <yifan1.zhang@amd.com>
>>>>
>>>> commit afd18180c07026f94a80ff024acef5f4159084a4 upstream.
>>>>
>>>> When IOMMU disabled in sbios and kfd in iommuv2 path, iommuv2
>>>> init will fail. But this failure should not block amdgpu driver init.
>>>>
>>>> Reported-by: youling <youling257@gmail.com>
>>>> Tested-by: youling <youling257@gmail.com>
>>>> Signed-off-by: Yifan Zhang <yifan1.zhang@amd.com>
>>>> Reviewed-by: James Zhu <James.Zhu@amd.com>
>>>> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
>>>> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
>>>> Signed-off-by: James Zhu <James.Zhu@amd.com>
>>>> ---
>>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 4 ----
>>>>    drivers/gpu/drm/amd/amdkfd/kfd_device.c    | 3 +++
>>>>    2 files changed, 3 insertions(+), 4 deletions(-)
>>>>
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>>> index 488e574f5da1..f262c4e7a48a 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>>> @@ -2255,10 +2255,6 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev)
>>>>    		amdgpu_xgmi_add_device(adev);
>>>>    	amdgpu_amdkfd_device_init(adev);
>>>> -	r = amdgpu_amdkfd_resume_iommu(adev);
>>>> -	if (r)
>>>> -		goto init_failed;
>>>> -
>>>>    	amdgpu_fru_get_product_info(adev);
>>>>    init_failed:
>>>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
>>>> index 1204dae85797..b35f0af71f00 100644
>>>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
>>>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
>>>> @@ -751,6 +751,9 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
>>>>    	kfd_cwsr_init(kfd);
>>>> +	if (kgd2kfd_resume_iommu(kfd))
>>>> +		goto device_iommu_error;
>>>> +
>>>>    	if (kfd_resume(kfd))
>>>>    		goto kfd_resume_error;
>>>> -- 
>>>> 2.25.1
>>>>
>>> Like I said last time, do not change the backport unless you HAVE to.
>>> You did it here again for no good reason :(
>> [JZ] Yes, I should add more explanation next time.
>>
>> Backport conflict fix to remove  svm_migrate_init((struct amdgpu_device
>> *)kfd->kgd);
>>
>> new AMD svm feature has not been added for 5.10 So it is safe to remove it.
> No, I am talking about the fact that you fixed up a coding style fix in
> this backport that is not in the original commit in Linus's tree.

[JZ] I see. this fix is not necessary. Do you want me to send v2 with

this unnecessary coding style fix dropping for backport?


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 6/6] drm/amdkfd: fix boot failure when iommu is disabled in Picasso.
  2021-12-10 14:46         ` James Zhu
@ 2021-12-10 15:12           ` Greg Kroah-Hartman
  2021-12-10 15:33             ` James Zhu
  0 siblings, 1 reply; 14+ messages in thread
From: Greg Kroah-Hartman @ 2021-12-10 15:12 UTC (permalink / raw)
  To: James Zhu
  Cc: James Zhu, stable, jzhums, alexander.deucher, kolAflash,
	Yifan Zhang, youling

On Fri, Dec 10, 2021 at 09:46:08AM -0500, James Zhu wrote:
> 
> On 2021-12-10 9:35 a.m., Greg Kroah-Hartman wrote:
> > On Fri, Dec 10, 2021 at 09:14:30AM -0500, James Zhu wrote:
> > > On 2021-12-10 8:33 a.m., Greg Kroah-Hartman wrote:
> > > > On Thu, Dec 09, 2021 at 05:09:56PM -0500, James Zhu wrote:
> > > > > From: Yifan Zhang <yifan1.zhang@amd.com>
> > > > > 
> > > > > commit afd18180c07026f94a80ff024acef5f4159084a4 upstream.
> > > > > 
> > > > > When IOMMU disabled in sbios and kfd in iommuv2 path, iommuv2
> > > > > init will fail. But this failure should not block amdgpu driver init.
> > > > > 
> > > > > Reported-by: youling <youling257@gmail.com>
> > > > > Tested-by: youling <youling257@gmail.com>
> > > > > Signed-off-by: Yifan Zhang <yifan1.zhang@amd.com>
> > > > > Reviewed-by: James Zhu <James.Zhu@amd.com>
> > > > > Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
> > > > > Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
> > > > > Signed-off-by: James Zhu <James.Zhu@amd.com>
> > > > > ---
> > > > >    drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 4 ----
> > > > >    drivers/gpu/drm/amd/amdkfd/kfd_device.c    | 3 +++
> > > > >    2 files changed, 3 insertions(+), 4 deletions(-)
> > > > > 
> > > > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> > > > > index 488e574f5da1..f262c4e7a48a 100644
> > > > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> > > > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> > > > > @@ -2255,10 +2255,6 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev)
> > > > >    		amdgpu_xgmi_add_device(adev);
> > > > >    	amdgpu_amdkfd_device_init(adev);
> > > > > -	r = amdgpu_amdkfd_resume_iommu(adev);
> > > > > -	if (r)
> > > > > -		goto init_failed;
> > > > > -
> > > > >    	amdgpu_fru_get_product_info(adev);
> > > > >    init_failed:
> > > > > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> > > > > index 1204dae85797..b35f0af71f00 100644
> > > > > --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> > > > > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> > > > > @@ -751,6 +751,9 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
> > > > >    	kfd_cwsr_init(kfd);
> > > > > +	if (kgd2kfd_resume_iommu(kfd))
> > > > > +		goto device_iommu_error;
> > > > > +
> > > > >    	if (kfd_resume(kfd))
> > > > >    		goto kfd_resume_error;
> > > > > -- 
> > > > > 2.25.1
> > > > > 
> > > > Like I said last time, do not change the backport unless you HAVE to.
> > > > You did it here again for no good reason :(
> > > [JZ] Yes, I should add more explanation next time.
> > > 
> > > Backport conflict fix to remove  svm_migrate_init((struct amdgpu_device
> > > *)kfd->kgd);
> > > 
> > > new AMD svm feature has not been added for 5.10 So it is safe to remove it.
> > No, I am talking about the fact that you fixed up a coding style fix in
> > this backport that is not in the original commit in Linus's tree.
> 
> [JZ] I see. this fix is not necessary. Do you want me to send v2 with
> 
> this unnecessary coding style fix dropping for backport?
> 

I took what was in Linus's tree already.  Please verify that what I
applied to the queue still works.

thanks,

greg k-h

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 6/6] drm/amdkfd: fix boot failure when iommu is disabled in Picasso.
  2021-12-10 15:12           ` Greg Kroah-Hartman
@ 2021-12-10 15:33             ` James Zhu
  0 siblings, 0 replies; 14+ messages in thread
From: James Zhu @ 2021-12-10 15:33 UTC (permalink / raw)
  To: Greg Kroah-Hartman
  Cc: James Zhu, stable, jzhums, alexander.deucher, kolAflash,
	Yifan Zhang, youling


On 2021-12-10 10:12 a.m., Greg Kroah-Hartman wrote:
> On Fri, Dec 10, 2021 at 09:46:08AM -0500, James Zhu wrote:
>> On 2021-12-10 9:35 a.m., Greg Kroah-Hartman wrote:
>>> On Fri, Dec 10, 2021 at 09:14:30AM -0500, James Zhu wrote:
>>>> On 2021-12-10 8:33 a.m., Greg Kroah-Hartman wrote:
>>>>> On Thu, Dec 09, 2021 at 05:09:56PM -0500, James Zhu wrote:
>>>>>> From: Yifan Zhang <yifan1.zhang@amd.com>
>>>>>>
>>>>>> commit afd18180c07026f94a80ff024acef5f4159084a4 upstream.
>>>>>>
>>>>>> When IOMMU disabled in sbios and kfd in iommuv2 path, iommuv2
>>>>>> init will fail. But this failure should not block amdgpu driver init.
>>>>>>
>>>>>> Reported-by: youling <youling257@gmail.com>
>>>>>> Tested-by: youling <youling257@gmail.com>
>>>>>> Signed-off-by: Yifan Zhang <yifan1.zhang@amd.com>
>>>>>> Reviewed-by: James Zhu <James.Zhu@amd.com>
>>>>>> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
>>>>>> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
>>>>>> Signed-off-by: James Zhu <James.Zhu@amd.com>
>>>>>> ---
>>>>>>     drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 4 ----
>>>>>>     drivers/gpu/drm/amd/amdkfd/kfd_device.c    | 3 +++
>>>>>>     2 files changed, 3 insertions(+), 4 deletions(-)
>>>>>>
>>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>>>>> index 488e574f5da1..f262c4e7a48a 100644
>>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>>>>> @@ -2255,10 +2255,6 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev)
>>>>>>     		amdgpu_xgmi_add_device(adev);
>>>>>>     	amdgpu_amdkfd_device_init(adev);
>>>>>> -	r = amdgpu_amdkfd_resume_iommu(adev);
>>>>>> -	if (r)
>>>>>> -		goto init_failed;
>>>>>> -
>>>>>>     	amdgpu_fru_get_product_info(adev);
>>>>>>     init_failed:
>>>>>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
>>>>>> index 1204dae85797..b35f0af71f00 100644
>>>>>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
>>>>>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
>>>>>> @@ -751,6 +751,9 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
>>>>>>     	kfd_cwsr_init(kfd);
>>>>>> +	if (kgd2kfd_resume_iommu(kfd))
>>>>>> +		goto device_iommu_error;
>>>>>> +
>>>>>>     	if (kfd_resume(kfd))
>>>>>>     		goto kfd_resume_error;
>>>>>> -- 
>>>>>> 2.25.1
>>>>>>
>>>>> Like I said last time, do not change the backport unless you HAVE to.
>>>>> You did it here again for no good reason :(
>>>> [JZ] Yes, I should add more explanation next time.
>>>>
>>>> Backport conflict fix to remove  svm_migrate_init((struct amdgpu_device
>>>> *)kfd->kgd);
>>>>
>>>> new AMD svm feature has not been added for 5.10 So it is safe to remove it.
>>> No, I am talking about the fact that you fixed up a coding style fix in
>>> this backport that is not in the original commit in Linus's tree.
>> [JZ] I see. this fix is not necessary. Do you want me to send v2 with
>>
>> this unnecessary coding style fix dropping for backport?
>>
> I took what was in Linus's tree already.  Please verify that what I
> applied to the queue still works.
[JZ] I verified it. It still work fine. Thanks for correction!
>
> thanks,
>
> greg k-h

^ permalink raw reply	[flat|nested] 14+ messages in thread

end of thread, other threads:[~2021-12-10 15:33 UTC | newest]

Thread overview: 14+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-12-09 22:09 [PATCH 0/6] Bug:211277 fix backport for 5.10 stable James Zhu
2021-12-09 22:09 ` [PATCH 1/6] drm/amd/amdkfd: adjust dummy functions' placement James Zhu
2021-12-09 22:09 ` [PATCH 2/6] drm/amdkfd: separate kfd_iommu_resume from kfd_resume James Zhu
2021-12-09 22:09 ` [PATCH 3/6] drm/amdgpu: add amdgpu_amdkfd_resume_iommu James Zhu
2021-12-09 22:09 ` [PATCH 4/6] drm/amdgpu: move iommu_resume before ip init/resume James Zhu
2021-12-09 22:09 ` [PATCH 5/6] drm/amdgpu: init iommu after amdkfd device init James Zhu
2021-12-09 22:09 ` [PATCH 6/6] drm/amdkfd: fix boot failure when iommu is disabled in Picasso James Zhu
2021-12-10 13:33   ` Greg Kroah-Hartman
2021-12-10 14:14     ` James Zhu
2021-12-10 14:35       ` Greg Kroah-Hartman
2021-12-10 14:46         ` James Zhu
2021-12-10 15:12           ` Greg Kroah-Hartman
2021-12-10 15:33             ` James Zhu
2021-12-10  6:33 ` [PATCH 0/6] Bug:211277 fix backport for 5.10 stable Greg KH

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.