All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] drm/amdgpu: revert "fix system hang issue during GPU reset"
@ 2020-08-12 15:53 Christian König
  2020-08-12 15:55 ` Alex Deucher
  0 siblings, 1 reply; 4+ messages in thread
From: Christian König @ 2020-08-12 15:53 UTC (permalink / raw)
  To: amd-gfx

The whole approach wasn't thought through till the end.

We already had a reset lock like this in the past and it caused the same problems like this one.

Completely revert the patch for now and add individual trylock protection to the hardware access functions as necessary.

This reverts commit edad8312cbbf9a33c86873fc4093664f150dd5c1.

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h           |   9 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c    |  40 +-
 .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c    |   2 +-
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c |   2 +-
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c |   2 +-
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c |   2 +-
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  |   7 -
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c        |   4 -
 drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c       |   4 -
 drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c   |  14 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c    |  57 ++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c       |   4 -
 drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c       |   6 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_job.c       |  14 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c       |   4 -
 drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c        | 353 ++++--------------
 drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c       |   4 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c       |   4 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c       |   2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c     |   3 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c      |   2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h      |   4 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c      |  11 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h      |   3 +-
 drivers/gpu/drm/amd/amdgpu/atom.c             |   1 -
 drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c        |  10 +-
 drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c         |   6 +-
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c         |  10 +-
 drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c        |   4 +-
 drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c         |   2 +-
 drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c         |   2 +-
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c         |   7 +-
 drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c         |  13 +-
 drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c         |  13 +-
 .../drm/amd/amdkfd/kfd_device_queue_manager.c |  16 +-
 drivers/gpu/drm/amd/amdkfd/kfd_process.c      |   4 -
 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c |   4 +-
 drivers/gpu/drm/amd/powerplay/amdgpu_smu.c    |   2 +-
 .../drm/amd/powerplay/hwmgr/vega20_hwmgr.c    |   2 +-
 39 files changed, 184 insertions(+), 469 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 1f9d97f61aa5..9c6fb38ce59d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -952,9 +952,9 @@ struct amdgpu_device {
 	bool                            in_suspend;
 	bool				in_hibernate;
 
-	atomic_t                        in_gpu_reset;
+	bool                            in_gpu_reset;
 	enum pp_mp1_state               mp1_state;
-	struct rw_semaphore	reset_sem;
+	struct mutex  lock_reset;
 	struct amdgpu_doorbell_index doorbell_index;
 
 	struct mutex			notifier_lock;
@@ -1269,9 +1269,4 @@ static inline bool amdgpu_is_tmz(struct amdgpu_device *adev)
        return adev->gmc.tmz_enabled;
 }
 
-static inline bool amdgpu_in_reset(struct amdgpu_device *adev)
-{
-	return atomic_read(&adev->in_gpu_reset) ? true : false;
-}
-
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
index 9738dccb1c2c..0effc1d46824 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -244,14 +244,11 @@ int amdgpu_amdkfd_alloc_gtt_mem(struct kgd_dev *kgd, size_t size,
 	if (cp_mqd_gfx9)
 		bp.flags |= AMDGPU_GEM_CREATE_CP_MQD_GFX9;
 
-	if (!down_read_trylock(&adev->reset_sem))
-		return -EIO;
-
 	r = amdgpu_bo_create(adev, &bp, &bo);
 	if (r) {
 		dev_err(adev->dev,
 			"failed to allocate BO for amdkfd (%d)\n", r);
-		goto err;
+		return r;
 	}
 
 	/* map the buffer */
@@ -286,7 +283,6 @@ int amdgpu_amdkfd_alloc_gtt_mem(struct kgd_dev *kgd, size_t size,
 
 	amdgpu_bo_unreserve(bo);
 
-	up_read(&adev->reset_sem);
 	return 0;
 
 allocate_mem_kmap_bo_failed:
@@ -295,25 +291,19 @@ int amdgpu_amdkfd_alloc_gtt_mem(struct kgd_dev *kgd, size_t size,
 	amdgpu_bo_unreserve(bo);
 allocate_mem_reserve_bo_failed:
 	amdgpu_bo_unref(&bo);
-err:
-	up_read(&adev->reset_sem);
+
 	return r;
 }
 
 void amdgpu_amdkfd_free_gtt_mem(struct kgd_dev *kgd, void *mem_obj)
 {
-	struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
 	struct amdgpu_bo *bo = (struct amdgpu_bo *) mem_obj;
 
-	down_read(&adev->reset_sem);
-
 	amdgpu_bo_reserve(bo, true);
 	amdgpu_bo_kunmap(bo);
 	amdgpu_bo_unpin(bo);
 	amdgpu_bo_unreserve(bo);
 	amdgpu_bo_unref(&(bo));
-
-	up_read(&adev->reset_sem);
 }
 
 int amdgpu_amdkfd_alloc_gws(struct kgd_dev *kgd, size_t size,
@@ -345,14 +335,9 @@ int amdgpu_amdkfd_alloc_gws(struct kgd_dev *kgd, size_t size,
 
 void amdgpu_amdkfd_free_gws(struct kgd_dev *kgd, void *mem_obj)
 {
-	struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
 	struct amdgpu_bo *bo = (struct amdgpu_bo *)mem_obj;
 
-	down_read(&adev->reset_sem);
-
 	amdgpu_bo_unref(&bo);
-
-	up_read(&adev->reset_sem);
 }
 
 uint32_t amdgpu_amdkfd_get_fw_version(struct kgd_dev *kgd,
@@ -626,15 +611,8 @@ int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum kgd_engine_type engine,
 	/* This works for NO_HWS. TODO: need to handle without knowing VMID */
 	job->vmid = vmid;
 
-	if (!down_read_trylock(&adev->reset_sem)) {
-		ret = -EIO;
-		goto err_ib_sched;
-	}
-
 	ret = amdgpu_ib_schedule(ring, 1, ib, job, &f);
 
-	up_read(&adev->reset_sem);
-
 	if (ret) {
 		DRM_ERROR("amdgpu: failed to schedule IB.\n");
 		goto err_ib_sched;
@@ -670,9 +648,6 @@ int amdgpu_amdkfd_flush_gpu_tlb_vmid(struct kgd_dev *kgd, uint16_t vmid)
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
 
-	if (!down_read_trylock(&adev->reset_sem))
-		return -EIO;
-
 	if (adev->family == AMDGPU_FAMILY_AI) {
 		int i;
 
@@ -682,8 +657,6 @@ int amdgpu_amdkfd_flush_gpu_tlb_vmid(struct kgd_dev *kgd, uint16_t vmid)
 		amdgpu_gmc_flush_gpu_tlb(adev, vmid, AMDGPU_GFXHUB_0, 0);
 	}
 
-	up_read(&adev->reset_sem);
-
 	return 0;
 }
 
@@ -692,18 +665,11 @@ int amdgpu_amdkfd_flush_gpu_tlb_pasid(struct kgd_dev *kgd, uint16_t pasid)
 	struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
 	const uint32_t flush_type = 0;
 	bool all_hub = false;
-	int ret = -EIO;
 
 	if (adev->family == AMDGPU_FAMILY_AI)
 		all_hub = true;
 
-	if (down_read_trylock(&adev->reset_sem)) {
-		ret = amdgpu_gmc_flush_gpu_tlb_pasid(adev,
-					pasid, flush_type, all_hub);
-		up_read(&adev->reset_sem);
-	}
-
-	return ret;
+	return amdgpu_gmc_flush_gpu_tlb_pasid(adev, pasid, flush_type, all_hub);
 }
 
 bool amdgpu_amdkfd_have_atomics_support(struct kgd_dev *kgd)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
index b872cdb0b705..691c89705bcd 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
@@ -543,7 +543,7 @@ static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd,
 	uint32_t temp;
 	struct v10_compute_mqd *m = get_mqd(mqd);
 
-	if (amdgpu_in_reset(adev))
+	if (adev->in_gpu_reset)
 		return -EIO;
 
 #if 0
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
index 832a200bb62f..0b7e78748540 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
@@ -425,7 +425,7 @@ static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd,
 	unsigned long flags, end_jiffies;
 	int retry;
 
-	if (amdgpu_in_reset(adev))
+	if (adev->in_gpu_reset)
 		return -EIO;
 
 	acquire_queue(kgd, pipe_id, queue_id);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
index d0940121a6a9..ccd635b812b5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
@@ -421,7 +421,7 @@ static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd,
 	int retry;
 	struct vi_mqd *m = get_mqd(mqd);
 
-	if (amdgpu_in_reset(adev))
+	if (adev->in_gpu_reset)
 		return -EIO;
 
 	acquire_queue(kgd, pipe_id, queue_id);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
index 7e11625b419e..961424bc7a1f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
@@ -541,7 +541,7 @@ int kgd_gfx_v9_hqd_destroy(struct kgd_dev *kgd, void *mqd,
 	uint32_t temp;
 	struct v9_mqd *m = get_mqd(mqd);
 
-	if (amdgpu_in_reset(adev))
+	if (adev->in_gpu_reset)
 		return -EIO;
 
 	acquire_queue(kgd, pipe_id, queue_id);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index 0d75726bd228..7e2394b50fbf 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -1194,9 +1194,6 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
 		return -EINVAL;
 	}
 
-	if (!down_read_trylock(&adev->reset_sem))
-		return -EIO;
-
 	*mem = kzalloc(sizeof(struct kgd_mem), GFP_KERNEL);
 	if (!*mem) {
 		ret = -ENOMEM;
@@ -1263,7 +1260,6 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
 	if (offset)
 		*offset = amdgpu_bo_mmap_offset(bo);
 
-	up_read(&adev->reset_sem);
 	return 0;
 
 allocate_init_user_pages_failed:
@@ -1281,9 +1277,6 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
 		sg_free_table(sg);
 		kfree(sg);
 	}
-
-	up_read(&adev->reset_sem);
-
 	return ret;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index a94b3f862fc2..ffbcaf4bfb8b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -1292,8 +1292,6 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
 	parser.adev = adev;
 	parser.filp = filp;
 
-	down_read(&adev->reset_sem);
-
 	r = amdgpu_cs_parser_init(&parser, data);
 	if (r) {
 		DRM_ERROR("Failed to initialize parser %d!\n", r);
@@ -1333,8 +1331,6 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
 out:
 	amdgpu_cs_parser_fini(&parser, r, reserved_buffers);
 
-	up_read(&adev->reset_sem);
-
 	return r;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
index d85d13f7a043..8842c55d4490 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -358,8 +358,6 @@ static int amdgpu_ctx_query2(struct amdgpu_device *adev,
 	if (atomic_read(&ctx->guilty))
 		out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_GUILTY;
 
-	down_read(&adev->reset_sem);
-
 	/*query ue count*/
 	ras_counter = amdgpu_ras_query_error_count(adev, false);
 	/*ras counter is monotonic increasing*/
@@ -375,8 +373,6 @@ static int amdgpu_ctx_query2(struct amdgpu_device *adev,
 		ctx->ras_counter_ce = ras_counter;
 	}
 
-	up_read(&adev->reset_sem);
-
 	mutex_unlock(&mgr->lock);
 	return 0;
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
index 0af249a1e35b..35fed75a4397 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
@@ -101,14 +101,14 @@ static int amdgpu_debugfs_autodump_open(struct inode *inode, struct file *file)
 
 	file->private_data = adev;
 
-	down_read(&adev->reset_sem);
+	mutex_lock(&adev->lock_reset);
 	if (adev->autodump.dumping.done) {
 		reinit_completion(&adev->autodump.dumping);
 		ret = 0;
 	} else {
 		ret = -EBUSY;
 	}
-	up_read(&adev->reset_sem);
+	mutex_unlock(&adev->lock_reset);
 
 	return ret;
 }
@@ -127,7 +127,7 @@ static unsigned int amdgpu_debugfs_autodump_poll(struct file *file, struct poll_
 
 	poll_wait(file, &adev->autodump.gpu_hang, poll_table);
 
-	if (amdgpu_in_reset(adev))
+	if (adev->in_gpu_reset)
 		return POLLIN | POLLRDNORM | POLLWRNORM;
 
 	return 0;
@@ -1242,7 +1242,7 @@ static int amdgpu_debugfs_test_ib(struct seq_file *m, void *data)
 	}
 
 	/* Avoid accidently unparking the sched thread during GPU reset */
-	down_read(&adev->reset_sem);
+	mutex_lock(&adev->lock_reset);
 
 	/* hold on the scheduler */
 	for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
@@ -1269,7 +1269,7 @@ static int amdgpu_debugfs_test_ib(struct seq_file *m, void *data)
 		kthread_unpark(ring->sched.thread);
 	}
 
-	up_read(&adev->reset_sem);
+	mutex_unlock(&adev->lock_reset);
 
 	pm_runtime_mark_last_busy(dev->dev);
 	pm_runtime_put_autosuspend(dev->dev);
@@ -1459,7 +1459,7 @@ static int amdgpu_debugfs_ib_preempt(void *data, u64 val)
 		return -ENOMEM;
 
 	/* Avoid accidently unparking the sched thread during GPU reset */
-	down_read(&adev->reset_sem);
+	mutex_lock(&adev->lock_reset);
 
 	/* stop the scheduler */
 	kthread_park(ring->sched.thread);
@@ -1500,7 +1500,7 @@ static int amdgpu_debugfs_ib_preempt(void *data, u64 val)
 	/* restart the scheduler */
 	kthread_unpark(ring->sched.thread);
 
-	up_read(&adev->reset_sem);
+	mutex_unlock(&adev->lock_reset);
 
 	ttm_bo_unlock_delayed_workqueue(&adev->mman.bdev, resched);
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index fe8878761c29..19aa0d7334c7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -1940,7 +1940,7 @@ static int amdgpu_device_fw_loading(struct amdgpu_device *adev)
 			if (adev->ip_blocks[i].status.hw == true)
 				break;
 
-			if (amdgpu_in_reset(adev) || adev->in_suspend) {
+			if (adev->in_gpu_reset || adev->in_suspend) {
 				r = adev->ip_blocks[i].version->funcs->resume(adev);
 				if (r) {
 					DRM_ERROR("resume of IP block <%s> failed %d\n",
@@ -2117,7 +2117,7 @@ static bool amdgpu_device_check_vram_lost(struct amdgpu_device *adev)
 			AMDGPU_RESET_MAGIC_NUM))
 		return true;
 
-	if (!amdgpu_in_reset(adev))
+	if (!adev->in_gpu_reset)
 		return false;
 
 	/*
@@ -3055,8 +3055,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
 	mutex_init(&adev->mn_lock);
 	mutex_init(&adev->virt.vf_errors.lock);
 	hash_init(adev->mn_hash);
-	init_rwsem(&adev->reset_sem);
-	atomic_set(&adev->in_gpu_reset, 0);
+	mutex_init(&adev->lock_reset);
 	mutex_init(&adev->psp.mutex);
 	mutex_init(&adev->notifier_lock);
 
@@ -4084,11 +4083,8 @@ static int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive,
 	list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
 		if (need_full_reset) {
 			/* post card */
-			if (amdgpu_atom_asic_init(tmp_adev->mode_info.atom_context)) {
-				dev_warn(tmp_adev->dev, "asic atom init failed!");
-				r = -EAGAIN;
-				goto out;
-			}
+			if (amdgpu_atom_asic_init(tmp_adev->mode_info.atom_context))
+				DRM_WARN("asic atom init failed!");
 
 			if (!r) {
 				dev_info(tmp_adev->dev, "GPU reset succeeded, trying to resume\n");
@@ -4178,18 +4174,16 @@ static int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive,
 	return r;
 }
 
-static bool amdgpu_device_lock_adev(struct amdgpu_device *adev, struct amdgpu_hive_info *hive)
+static bool amdgpu_device_lock_adev(struct amdgpu_device *adev, bool trylock)
 {
-	if (atomic_cmpxchg(&adev->in_gpu_reset, 0, 1) != 0)
-		return false;
-
-	if (hive) {
-		down_write_nest_lock(&adev->reset_sem, &hive->hive_lock);
-	} else {
-		down_write(&adev->reset_sem);
-	}
+	if (trylock) {
+		if (!mutex_trylock(&adev->lock_reset))
+			return false;
+	} else
+		mutex_lock(&adev->lock_reset);
 
 	atomic_inc(&adev->gpu_reset_counter);
+	adev->in_gpu_reset = true;
 	switch (amdgpu_asic_reset_method(adev)) {
 	case AMD_RESET_METHOD_MODE1:
 		adev->mp1_state = PP_MP1_STATE_SHUTDOWN;
@@ -4209,8 +4203,8 @@ static void amdgpu_device_unlock_adev(struct amdgpu_device *adev)
 {
 	amdgpu_vf_error_trans_all(adev);
 	adev->mp1_state = PP_MP1_STATE_NONE;
-	atomic_set(&adev->in_gpu_reset, 0);
-	up_write(&adev->reset_sem);
+	adev->in_gpu_reset = false;
+	mutex_unlock(&adev->lock_reset);
 }
 
 static void amdgpu_device_resume_display_audio(struct amdgpu_device *adev)
@@ -4320,14 +4314,12 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
 	 * We always reset all schedulers for device and all devices for XGMI
 	 * hive so that should take care of them too.
 	 */
-	hive = amdgpu_get_xgmi_hive(adev, false);
-	if (hive) {
-		if (atomic_cmpxchg(&hive->in_reset, 0, 1) != 0) {
-			DRM_INFO("Bailing on TDR for s_job:%llx, hive: %llx as another already in progress",
-				job ? job->base.id : -1, hive->hive_id);
-			return 0;
-		}
-		mutex_lock(&hive->hive_lock);
+	hive = amdgpu_get_xgmi_hive(adev, true);
+	if (hive && !mutex_trylock(&hive->reset_lock)) {
+		DRM_INFO("Bailing on TDR for s_job:%llx, hive: %llx as another already in progress",
+			  job ? job->base.id : -1, hive->hive_id);
+		mutex_unlock(&hive->hive_lock);
+		return 0;
 	}
 
 	/*
@@ -4349,11 +4341,11 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
 
 	/* block all schedulers and reset given job's ring */
 	list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
-		if (!amdgpu_device_lock_adev(tmp_adev, hive)) {
+		if (!amdgpu_device_lock_adev(tmp_adev, !hive)) {
 			DRM_INFO("Bailing on TDR for s_job:%llx, as another already in progress",
 				  job ? job->base.id : -1);
-			r = 0;
-			goto skip_recovery;
+			mutex_unlock(&hive->hive_lock);
+			return 0;
 		}
 
 		/*
@@ -4486,9 +4478,8 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
 		amdgpu_device_unlock_adev(tmp_adev);
 	}
 
-skip_recovery:
 	if (hive) {
-		atomic_set(&hive->in_reset, 0);
+		mutex_unlock(&hive->reset_lock);
 		mutex_unlock(&hive->hive_lock);
 	}
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
index ee1e8fff83b2..8c64d8d6cb82 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -670,8 +670,6 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
 		bo_va = NULL;
 	}
 
-	down_read(&adev->reset_sem);
-
 	switch (args->operation) {
 	case AMDGPU_VA_OP_MAP:
 		va_flags = amdgpu_gem_va_map_flags(adev, args->flags);
@@ -701,8 +699,6 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
 		amdgpu_gem_va_update_vm(adev, &fpriv->vm, bo_va,
 					args->operation);
 
-	up_read(&adev->reset_sem);
-
 error_backoff:
 	ttm_eu_backoff_reservation(&ticket, &list);
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
index 8ccd17d02cc6..a819360a4b6a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
@@ -719,7 +719,7 @@ uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg)
 	 *
 	 * also don't wait anymore for IRQ context
 	 * */
-	if (r < 1 && (amdgpu_in_reset(adev) || in_interrupt()))
+	if (r < 1 && (adev->in_gpu_reset || in_interrupt()))
 		goto failed_kiq_read;
 
 	might_sleep();
@@ -777,7 +777,7 @@ void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
 	 *
 	 * also don't wait anymore for IRQ context
 	 * */
-	if (r < 1 && (amdgpu_in_reset(adev) || in_interrupt()))
+	if (r < 1 && (adev->in_gpu_reset || in_interrupt()))
 		goto failed_kiq_write;
 
 	might_sleep();
@@ -796,5 +796,5 @@ void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
 	amdgpu_ring_undo(ring);
 	spin_unlock_irqrestore(&kiq->ring_lock, flags);
 failed_kiq_write:
-	dev_warn(adev->dev, "failed to write reg:%x\n", reg);
+	pr_err("failed to write reg:%x\n", reg);
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
index 75d37dfb51aa..937029ad5271 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -220,17 +220,17 @@ static struct dma_fence *amdgpu_job_run(struct drm_sched_job *sched_job)
 
 	trace_amdgpu_sched_run_job(job);
 
-	if (down_read_trylock(&ring->adev->reset_sem)) {
+	if (job->vram_lost_counter != atomic_read(&ring->adev->vram_lost_counter))
+		dma_fence_set_error(finished, -ECANCELED);/* skip IB as well if VRAM lost */
+
+	if (finished->error < 0) {
+		DRM_INFO("Skip scheduling IBs!\n");
+	} else {
 		r = amdgpu_ib_schedule(ring, job->num_ibs, job->ibs, job,
-					&fence);
-		up_read(&ring->adev->reset_sem);
+				       &fence);
 		if (r)
 			DRM_ERROR("Error scheduling IBs (%d)\n", r);
-	} else {
-		dma_fence_set_error(finished, -ECANCELED);
-		DRM_INFO("Skip scheduling IBs!\n");
 	}
-
 	/* if gpu reset, hw fence will be replaced here */
 	dma_fence_put(job->fence);
 	job->fence = dma_fence_get(fence);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index f8de949d2510..b4a9e0478f25 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -1087,8 +1087,6 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev,
 	if (!fpriv)
 		return;
 
-	down_read(&adev->reset_sem);
-
 	pm_runtime_get_sync(dev->dev);
 
 	if (amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_UVD) != NULL)
@@ -1127,8 +1125,6 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev,
 
 	pm_runtime_mark_last_busy(dev->dev);
 	pm_runtime_put_autosuspend(dev->dev);
-
-	up_read(&adev->reset_sem);
 }
 
 /*
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
index 1705e328c6fc..65ad174bb976 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
@@ -163,7 +163,7 @@ static ssize_t amdgpu_get_power_dpm_state(struct device *dev,
 	enum amd_pm_state_type pm;
 	int ret;
 
-	if (amdgpu_in_reset(adev))
+	if (adev->in_gpu_reset)
 		return -EPERM;
 
 	ret = pm_runtime_get_sync(ddev->dev);
@@ -172,8 +172,6 @@ static ssize_t amdgpu_get_power_dpm_state(struct device *dev,
 		return ret;
 	}
 
-	down_read(&adev->reset_sem);
-
 	if (is_support_sw_smu(adev)) {
 		if (adev->smu.ppt_funcs->get_current_power_state)
 			pm = smu_get_current_power_state(&adev->smu);
@@ -185,8 +183,6 @@ static ssize_t amdgpu_get_power_dpm_state(struct device *dev,
 		pm = adev->pm.dpm.user_state;
 	}
 
-	up_read(&adev->reset_sem);
-
 	pm_runtime_mark_last_busy(ddev->dev);
 	pm_runtime_put_autosuspend(ddev->dev);
 
@@ -205,7 +201,7 @@ static ssize_t amdgpu_set_power_dpm_state(struct device *dev,
 	enum amd_pm_state_type  state;
 	int ret;
 
-	if (amdgpu_in_reset(adev))
+	if (adev->in_gpu_reset)
 		return -EPERM;
 
 	if (strncmp("battery", buf, strlen("battery")) == 0)
@@ -223,8 +219,6 @@ static ssize_t amdgpu_set_power_dpm_state(struct device *dev,
 		return ret;
 	}
 
-	down_read(&adev->reset_sem);
-
 	if (is_support_sw_smu(adev)) {
 		mutex_lock(&adev->pm.mutex);
 		adev->pm.dpm.user_state = state;
@@ -238,9 +232,6 @@ static ssize_t amdgpu_set_power_dpm_state(struct device *dev,
 
 		amdgpu_pm_compute_clocks(adev);
 	}
-
-	up_read(&adev->reset_sem);
-
 	pm_runtime_mark_last_busy(ddev->dev);
 	pm_runtime_put_autosuspend(ddev->dev);
 
@@ -316,7 +307,7 @@ static ssize_t amdgpu_get_power_dpm_force_performance_level(struct device *dev,
 	enum amd_dpm_forced_level level = 0xff;
 	int ret;
 
-	if (amdgpu_in_reset(adev))
+	if (adev->in_gpu_reset)
 		return -EPERM;
 
 	ret = pm_runtime_get_sync(ddev->dev);
@@ -325,8 +316,6 @@ static ssize_t amdgpu_get_power_dpm_force_performance_level(struct device *dev,
 		return ret;
 	}
 
-	down_read(&adev->reset_sem);
-
 	if (is_support_sw_smu(adev))
 		level = smu_get_performance_level(&adev->smu);
 	else if (adev->powerplay.pp_funcs->get_performance_level)
@@ -334,8 +323,6 @@ static ssize_t amdgpu_get_power_dpm_force_performance_level(struct device *dev,
 	else
 		level = adev->pm.dpm.forced_level;
 
-	up_read(&adev->reset_sem);
-
 	pm_runtime_mark_last_busy(ddev->dev);
 	pm_runtime_put_autosuspend(ddev->dev);
 
@@ -362,7 +349,7 @@ static ssize_t amdgpu_set_power_dpm_force_performance_level(struct device *dev,
 	enum amd_dpm_forced_level current_level = 0xff;
 	int ret = 0;
 
-	if (amdgpu_in_reset(adev))
+	if (adev->in_gpu_reset)
 		return -EPERM;
 
 	if (strncmp("low", buf, strlen("low")) == 0) {
@@ -393,8 +380,6 @@ static ssize_t amdgpu_set_power_dpm_force_performance_level(struct device *dev,
 		return ret;
 	}
 
-	down_read(&adev->reset_sem);
-
 	if (is_support_sw_smu(adev))
 		current_level = smu_get_performance_level(&adev->smu);
 	else if (adev->powerplay.pp_funcs->get_performance_level)
@@ -403,8 +388,7 @@ static ssize_t amdgpu_set_power_dpm_force_performance_level(struct device *dev,
 	if (current_level == level) {
 		pm_runtime_mark_last_busy(ddev->dev);
 		pm_runtime_put_autosuspend(ddev->dev);
-		ret = count;
-		goto pro_end;
+		return count;
 	}
 
 	if (adev->asic_type == CHIP_RAVEN) {
@@ -425,8 +409,7 @@ static ssize_t amdgpu_set_power_dpm_force_performance_level(struct device *dev,
 		pr_err("Currently not in any profile mode!\n");
 		pm_runtime_mark_last_busy(ddev->dev);
 		pm_runtime_put_autosuspend(ddev->dev);
-		ret = -EINVAL;
-		goto pro_end;
+		return -EINVAL;
 	}
 
 	if (is_support_sw_smu(adev)) {
@@ -434,8 +417,7 @@ static ssize_t amdgpu_set_power_dpm_force_performance_level(struct device *dev,
 		if (ret) {
 			pm_runtime_mark_last_busy(ddev->dev);
 			pm_runtime_put_autosuspend(ddev->dev);
-			ret = -EINVAL;
-			goto pro_end;
+			return -EINVAL;
 		}
 	} else if (adev->powerplay.pp_funcs->force_performance_level) {
 		mutex_lock(&adev->pm.mutex);
@@ -443,16 +425,14 @@ static ssize_t amdgpu_set_power_dpm_force_performance_level(struct device *dev,
 			mutex_unlock(&adev->pm.mutex);
 			pm_runtime_mark_last_busy(ddev->dev);
 			pm_runtime_put_autosuspend(ddev->dev);
-			ret = -EINVAL;
-			goto pro_end;
+			return -EINVAL;
 		}
 		ret = amdgpu_dpm_force_performance_level(adev, level);
 		if (ret) {
 			mutex_unlock(&adev->pm.mutex);
 			pm_runtime_mark_last_busy(ddev->dev);
 			pm_runtime_put_autosuspend(ddev->dev);
-			ret = -EINVAL;
-			goto pro_end;
+			return -EINVAL;
 		} else {
 			adev->pm.dpm.forced_level = level;
 		}
@@ -461,9 +441,7 @@ static ssize_t amdgpu_set_power_dpm_force_performance_level(struct device *dev,
 	pm_runtime_mark_last_busy(ddev->dev);
 	pm_runtime_put_autosuspend(ddev->dev);
 
-pro_end:
-	up_read(&adev->reset_sem);
-	return ret;
+	return count;
 }
 
 static ssize_t amdgpu_get_pp_num_states(struct device *dev,
@@ -475,7 +453,7 @@ static ssize_t amdgpu_get_pp_num_states(struct device *dev,
 	struct pp_states_info data;
 	int i, buf_len, ret;
 
-	if (amdgpu_in_reset(adev))
+	if (adev->in_gpu_reset)
 		return -EPERM;
 
 	ret = pm_runtime_get_sync(ddev->dev);
@@ -519,7 +497,7 @@ static ssize_t amdgpu_get_pp_cur_state(struct device *dev,
 	enum amd_pm_state_type pm = 0;
 	int i = 0, ret = 0;
 
-	if (amdgpu_in_reset(adev))
+	if (adev->in_gpu_reset)
 		return -EPERM;
 
 	ret = pm_runtime_get_sync(ddev->dev);
@@ -560,7 +538,7 @@ static ssize_t amdgpu_get_pp_force_state(struct device *dev,
 	struct drm_device *ddev = dev_get_drvdata(dev);
 	struct amdgpu_device *adev = ddev->dev_private;
 
-	if (amdgpu_in_reset(adev))
+	if (adev->in_gpu_reset)
 		return -EPERM;
 
 	if (adev->pp_force_state_enabled)
@@ -580,7 +558,7 @@ static ssize_t amdgpu_set_pp_force_state(struct device *dev,
 	unsigned long idx;
 	int ret;
 
-	if (amdgpu_in_reset(adev))
+	if (adev->in_gpu_reset)
 		return -EPERM;
 
 	if (strlen(buf) == 1)
@@ -606,7 +584,6 @@ static ssize_t amdgpu_set_pp_force_state(struct device *dev,
 			return ret;
 		}
 
-		down_read(&adev->reset_sem);
 		/* only set user selected power states */
 		if (state != POWER_STATE_TYPE_INTERNAL_BOOT &&
 		    state != POWER_STATE_TYPE_DEFAULT) {
@@ -614,8 +591,6 @@ static ssize_t amdgpu_set_pp_force_state(struct device *dev,
 					AMD_PP_TASK_ENABLE_USER_STATE, &state);
 			adev->pp_force_state_enabled = true;
 		}
-		up_read(&adev->reset_sem);
-
 		pm_runtime_mark_last_busy(ddev->dev);
 		pm_runtime_put_autosuspend(ddev->dev);
 	}
@@ -643,7 +618,7 @@ static ssize_t amdgpu_get_pp_table(struct device *dev,
 	char *table = NULL;
 	int size, ret;
 
-	if (amdgpu_in_reset(adev))
+	if (adev->in_gpu_reset)
 		return -EPERM;
 
 	ret = pm_runtime_get_sync(ddev->dev);
@@ -687,7 +662,7 @@ static ssize_t amdgpu_set_pp_table(struct device *dev,
 	struct amdgpu_device *adev = ddev->dev_private;
 	int ret = 0;
 
-	if (amdgpu_in_reset(adev))
+	if (adev->in_gpu_reset)
 		return -EPERM;
 
 	ret = pm_runtime_get_sync(ddev->dev);
@@ -696,21 +671,16 @@ static ssize_t amdgpu_set_pp_table(struct device *dev,
 		return ret;
 	}
 
-	down_read(&adev->reset_sem);
-
 	if (is_support_sw_smu(adev)) {
 		ret = smu_sys_set_pp_table(&adev->smu, (void *)buf, count);
 		if (ret) {
 			pm_runtime_mark_last_busy(ddev->dev);
 			pm_runtime_put_autosuspend(ddev->dev);
-			up_read(&adev->reset_sem);
 			return ret;
 		}
 	} else if (adev->powerplay.pp_funcs->set_pp_table)
 		amdgpu_dpm_set_pp_table(adev, buf, count);
 
-	up_read(&adev->reset_sem);
-
 	pm_runtime_mark_last_busy(ddev->dev);
 	pm_runtime_put_autosuspend(ddev->dev);
 
@@ -845,7 +815,7 @@ static ssize_t amdgpu_set_pp_od_clk_voltage(struct device *dev,
 	const char delimiter[3] = {' ', '\n', '\0'};
 	uint32_t type;
 
-	if (amdgpu_in_reset(adev))
+	if (adev->in_gpu_reset)
 		return -EPERM;
 
 	if (count > 127)
@@ -889,10 +859,6 @@ static ssize_t amdgpu_set_pp_od_clk_voltage(struct device *dev,
 		return ret;
 	}
 
-	ret = count;
-
-	down_read(&adev->reset_sem);
-
 	if (is_support_sw_smu(adev)) {
 		ret = smu_od_edit_dpm_table(&adev->smu, type,
 					    parameter, parameter_size);
@@ -900,8 +866,7 @@ static ssize_t amdgpu_set_pp_od_clk_voltage(struct device *dev,
 		if (ret) {
 			pm_runtime_mark_last_busy(ddev->dev);
 			pm_runtime_put_autosuspend(ddev->dev);
-			ret = -EINVAL;
-			goto pro_end;
+			return -EINVAL;
 		}
 	} else {
 		if (adev->powerplay.pp_funcs->odn_edit_dpm_table) {
@@ -910,8 +875,7 @@ static ssize_t amdgpu_set_pp_od_clk_voltage(struct device *dev,
 			if (ret) {
 				pm_runtime_mark_last_busy(ddev->dev);
 				pm_runtime_put_autosuspend(ddev->dev);
-				ret = -EINVAL;
-				goto pro_end;
+				return -EINVAL;
 			}
 		}
 
@@ -922,22 +886,18 @@ static ssize_t amdgpu_set_pp_od_clk_voltage(struct device *dev,
 						NULL);
 				pm_runtime_mark_last_busy(ddev->dev);
 				pm_runtime_put_autosuspend(ddev->dev);
-				ret = count;
-				goto pro_end;
+				return count;
 			} else {
 				pm_runtime_mark_last_busy(ddev->dev);
 				pm_runtime_put_autosuspend(ddev->dev);
-				ret = -EINVAL;
-				goto pro_end;
+				return -EINVAL;
 			}
 		}
 	}
 	pm_runtime_mark_last_busy(ddev->dev);
 	pm_runtime_put_autosuspend(ddev->dev);
 
-pro_end:
-	up_read(&adev->reset_sem);
-	return ret;
+	return count;
 }
 
 static ssize_t amdgpu_get_pp_od_clk_voltage(struct device *dev,
@@ -949,7 +909,7 @@ static ssize_t amdgpu_get_pp_od_clk_voltage(struct device *dev,
 	ssize_t size;
 	int ret;
 
-	if (amdgpu_in_reset(adev))
+	if (adev->in_gpu_reset)
 		return -EPERM;
 
 	ret = pm_runtime_get_sync(ddev->dev);
@@ -1003,7 +963,7 @@ static ssize_t amdgpu_set_pp_features(struct device *dev,
 	uint64_t featuremask;
 	int ret;
 
-	if (amdgpu_in_reset(adev))
+	if (adev->in_gpu_reset)
 		return -EPERM;
 
 	ret = kstrtou64(buf, 0, &featuremask);
@@ -1018,13 +978,11 @@ static ssize_t amdgpu_set_pp_features(struct device *dev,
 		return ret;
 	}
 
-	down_read(&adev->reset_sem);
 	if (is_support_sw_smu(adev)) {
 		ret = smu_sys_set_pp_feature_mask(&adev->smu, featuremask);
 		if (ret) {
 			pm_runtime_mark_last_busy(ddev->dev);
 			pm_runtime_put_autosuspend(ddev->dev);
-			up_read(&adev->reset_sem);
 			return -EINVAL;
 		}
 	} else if (adev->powerplay.pp_funcs->set_ppfeature_status) {
@@ -1032,12 +990,9 @@ static ssize_t amdgpu_set_pp_features(struct device *dev,
 		if (ret) {
 			pm_runtime_mark_last_busy(ddev->dev);
 			pm_runtime_put_autosuspend(ddev->dev);
-			up_read(&adev->reset_sem);
 			return -EINVAL;
 		}
 	}
-	up_read(&adev->reset_sem);
-
 	pm_runtime_mark_last_busy(ddev->dev);
 	pm_runtime_put_autosuspend(ddev->dev);
 
@@ -1053,7 +1008,7 @@ static ssize_t amdgpu_get_pp_features(struct device *dev,
 	ssize_t size;
 	int ret;
 
-	if (amdgpu_in_reset(adev))
+	if (adev->in_gpu_reset)
 		return -EPERM;
 
 	ret = pm_runtime_get_sync(ddev->dev);
@@ -1062,8 +1017,6 @@ static ssize_t amdgpu_get_pp_features(struct device *dev,
 		return ret;
 	}
 
-	down_read(&adev->reset_sem);
-
 	if (is_support_sw_smu(adev))
 		size = smu_sys_get_pp_feature_mask(&adev->smu, buf);
 	else if (adev->powerplay.pp_funcs->get_ppfeature_status)
@@ -1071,8 +1024,6 @@ static ssize_t amdgpu_get_pp_features(struct device *dev,
 	else
 		size = snprintf(buf, PAGE_SIZE, "\n");
 
-	up_read(&adev->reset_sem);
-
 	pm_runtime_mark_last_busy(ddev->dev);
 	pm_runtime_put_autosuspend(ddev->dev);
 
@@ -1118,7 +1069,7 @@ static ssize_t amdgpu_get_pp_dpm_sclk(struct device *dev,
 	ssize_t size;
 	int ret;
 
-	if (amdgpu_in_reset(adev))
+	if (adev->in_gpu_reset)
 		return -EPERM;
 
 	ret = pm_runtime_get_sync(ddev->dev);
@@ -1127,8 +1078,6 @@ static ssize_t amdgpu_get_pp_dpm_sclk(struct device *dev,
 		return ret;
 	}
 
-	down_read(&adev->reset_sem);
-
 	if (is_support_sw_smu(adev))
 		size = smu_print_clk_levels(&adev->smu, SMU_SCLK, buf);
 	else if (adev->powerplay.pp_funcs->print_clock_levels)
@@ -1136,8 +1085,6 @@ static ssize_t amdgpu_get_pp_dpm_sclk(struct device *dev,
 	else
 		size = snprintf(buf, PAGE_SIZE, "\n");
 
-	up_read(&adev->reset_sem);
-
 	pm_runtime_mark_last_busy(ddev->dev);
 	pm_runtime_put_autosuspend(ddev->dev);
 
@@ -1190,7 +1137,7 @@ static ssize_t amdgpu_set_pp_dpm_sclk(struct device *dev,
 	int ret;
 	uint32_t mask = 0;
 
-	if (amdgpu_in_reset(adev))
+	if (adev->in_gpu_reset)
 		return -EPERM;
 
 	ret = amdgpu_read_mask(buf, count, &mask);
@@ -1203,15 +1150,11 @@ static ssize_t amdgpu_set_pp_dpm_sclk(struct device *dev,
 		return ret;
 	}
 
-	down_read(&adev->reset_sem);
-
 	if (is_support_sw_smu(adev))
 		ret = smu_force_clk_levels(&adev->smu, SMU_SCLK, mask);
 	else if (adev->powerplay.pp_funcs->force_clock_level)
 		ret = amdgpu_dpm_force_clock_level(adev, PP_SCLK, mask);
 
-	up_read(&adev->reset_sem);
-
 	pm_runtime_mark_last_busy(ddev->dev);
 	pm_runtime_put_autosuspend(ddev->dev);
 
@@ -1230,7 +1173,7 @@ static ssize_t amdgpu_get_pp_dpm_mclk(struct device *dev,
 	ssize_t size;
 	int ret;
 
-	if (amdgpu_in_reset(adev))
+	if (adev->in_gpu_reset)
 		return -EPERM;
 
 	ret = pm_runtime_get_sync(ddev->dev);
@@ -1239,8 +1182,6 @@ static ssize_t amdgpu_get_pp_dpm_mclk(struct device *dev,
 		return ret;
 	}
 
-	down_read(&adev->reset_sem);
-
 	if (is_support_sw_smu(adev))
 		size = smu_print_clk_levels(&adev->smu, SMU_MCLK, buf);
 	else if (adev->powerplay.pp_funcs->print_clock_levels)
@@ -1248,8 +1189,6 @@ static ssize_t amdgpu_get_pp_dpm_mclk(struct device *dev,
 	else
 		size = snprintf(buf, PAGE_SIZE, "\n");
 
-	up_read(&adev->reset_sem);
-
 	pm_runtime_mark_last_busy(ddev->dev);
 	pm_runtime_put_autosuspend(ddev->dev);
 
@@ -1266,7 +1205,7 @@ static ssize_t amdgpu_set_pp_dpm_mclk(struct device *dev,
 	uint32_t mask = 0;
 	int ret;
 
-	if (amdgpu_in_reset(adev))
+	if (adev->in_gpu_reset)
 		return -EPERM;
 
 	ret = amdgpu_read_mask(buf, count, &mask);
@@ -1279,15 +1218,11 @@ static ssize_t amdgpu_set_pp_dpm_mclk(struct device *dev,
 		return ret;
 	}
 
-	down_read(&adev->reset_sem);
-
 	if (is_support_sw_smu(adev))
 		ret = smu_force_clk_levels(&adev->smu, SMU_MCLK, mask);
 	else if (adev->powerplay.pp_funcs->force_clock_level)
 		ret = amdgpu_dpm_force_clock_level(adev, PP_MCLK, mask);
 
-	up_read(&adev->reset_sem);
-
 	pm_runtime_mark_last_busy(ddev->dev);
 	pm_runtime_put_autosuspend(ddev->dev);
 
@@ -1306,7 +1241,7 @@ static ssize_t amdgpu_get_pp_dpm_socclk(struct device *dev,
 	ssize_t size;
 	int ret;
 
-	if (amdgpu_in_reset(adev))
+	if (adev->in_gpu_reset)
 		return -EPERM;
 
 	ret = pm_runtime_get_sync(ddev->dev);
@@ -1315,8 +1250,6 @@ static ssize_t amdgpu_get_pp_dpm_socclk(struct device *dev,
 		return ret;
 	}
 
-	down_read(&adev->reset_sem);
-
 	if (is_support_sw_smu(adev))
 		size = smu_print_clk_levels(&adev->smu, SMU_SOCCLK, buf);
 	else if (adev->powerplay.pp_funcs->print_clock_levels)
@@ -1324,8 +1257,6 @@ static ssize_t amdgpu_get_pp_dpm_socclk(struct device *dev,
 	else
 		size = snprintf(buf, PAGE_SIZE, "\n");
 
-	up_read(&adev->reset_sem);
-
 	pm_runtime_mark_last_busy(ddev->dev);
 	pm_runtime_put_autosuspend(ddev->dev);
 
@@ -1342,7 +1273,7 @@ static ssize_t amdgpu_set_pp_dpm_socclk(struct device *dev,
 	int ret;
 	uint32_t mask = 0;
 
-	if (amdgpu_in_reset(adev))
+	if (adev->in_gpu_reset)
 		return -EPERM;
 
 	ret = amdgpu_read_mask(buf, count, &mask);
@@ -1355,8 +1286,6 @@ static ssize_t amdgpu_set_pp_dpm_socclk(struct device *dev,
 		return ret;
 	}
 
-	down_read(&adev->reset_sem);
-
 	if (is_support_sw_smu(adev))
 		ret = smu_force_clk_levels(&adev->smu, SMU_SOCCLK, mask);
 	else if (adev->powerplay.pp_funcs->force_clock_level)
@@ -1364,8 +1293,6 @@ static ssize_t amdgpu_set_pp_dpm_socclk(struct device *dev,
 	else
 		ret = 0;
 
-	up_read(&adev->reset_sem);
-
 	pm_runtime_mark_last_busy(ddev->dev);
 	pm_runtime_put_autosuspend(ddev->dev);
 
@@ -1384,7 +1311,7 @@ static ssize_t amdgpu_get_pp_dpm_fclk(struct device *dev,
 	ssize_t size;
 	int ret;
 
-	if (amdgpu_in_reset(adev))
+	if (adev->in_gpu_reset)
 		return -EPERM;
 
 	ret = pm_runtime_get_sync(ddev->dev);
@@ -1393,8 +1320,6 @@ static ssize_t amdgpu_get_pp_dpm_fclk(struct device *dev,
 		return ret;
 	}
 
-	down_read(&adev->reset_sem);
-
 	if (is_support_sw_smu(adev))
 		size = smu_print_clk_levels(&adev->smu, SMU_FCLK, buf);
 	else if (adev->powerplay.pp_funcs->print_clock_levels)
@@ -1402,8 +1327,6 @@ static ssize_t amdgpu_get_pp_dpm_fclk(struct device *dev,
 	else
 		size = snprintf(buf, PAGE_SIZE, "\n");
 
-	up_read(&adev->reset_sem);
-
 	pm_runtime_mark_last_busy(ddev->dev);
 	pm_runtime_put_autosuspend(ddev->dev);
 
@@ -1420,7 +1343,7 @@ static ssize_t amdgpu_set_pp_dpm_fclk(struct device *dev,
 	int ret;
 	uint32_t mask = 0;
 
-	if (amdgpu_in_reset(adev))
+	if (adev->in_gpu_reset)
 		return -EPERM;
 
 	ret = amdgpu_read_mask(buf, count, &mask);
@@ -1433,8 +1356,6 @@ static ssize_t amdgpu_set_pp_dpm_fclk(struct device *dev,
 		return ret;
 	}
 
-	down_read(&adev->reset_sem);
-
 	if (is_support_sw_smu(adev))
 		ret = smu_force_clk_levels(&adev->smu, SMU_FCLK, mask);
 	else if (adev->powerplay.pp_funcs->force_clock_level)
@@ -1442,8 +1363,6 @@ static ssize_t amdgpu_set_pp_dpm_fclk(struct device *dev,
 	else
 		ret = 0;
 
-	up_read(&adev->reset_sem);
-
 	pm_runtime_mark_last_busy(ddev->dev);
 	pm_runtime_put_autosuspend(ddev->dev);
 
@@ -1462,7 +1381,7 @@ static ssize_t amdgpu_get_pp_dpm_dcefclk(struct device *dev,
 	ssize_t size;
 	int ret;
 
-	if (amdgpu_in_reset(adev))
+	if (adev->in_gpu_reset)
 		return -EPERM;
 
 	ret = pm_runtime_get_sync(ddev->dev);
@@ -1471,8 +1390,6 @@ static ssize_t amdgpu_get_pp_dpm_dcefclk(struct device *dev,
 		return ret;
 	}
 
-	down_read(&adev->reset_sem);
-
 	if (is_support_sw_smu(adev))
 		size = smu_print_clk_levels(&adev->smu, SMU_DCEFCLK, buf);
 	else if (adev->powerplay.pp_funcs->print_clock_levels)
@@ -1480,8 +1397,6 @@ static ssize_t amdgpu_get_pp_dpm_dcefclk(struct device *dev,
 	else
 		size = snprintf(buf, PAGE_SIZE, "\n");
 
-	up_read(&adev->reset_sem);
-
 	pm_runtime_mark_last_busy(ddev->dev);
 	pm_runtime_put_autosuspend(ddev->dev);
 
@@ -1498,7 +1413,7 @@ static ssize_t amdgpu_set_pp_dpm_dcefclk(struct device *dev,
 	int ret;
 	uint32_t mask = 0;
 
-	if (amdgpu_in_reset(adev))
+	if (adev->in_gpu_reset)
 		return -EPERM;
 
 	ret = amdgpu_read_mask(buf, count, &mask);
@@ -1511,8 +1426,6 @@ static ssize_t amdgpu_set_pp_dpm_dcefclk(struct device *dev,
 		return ret;
 	}
 
-	down_read(&adev->reset_sem);
-
 	if (is_support_sw_smu(adev))
 		ret = smu_force_clk_levels(&adev->smu, SMU_DCEFCLK, mask);
 	else if (adev->powerplay.pp_funcs->force_clock_level)
@@ -1520,8 +1433,6 @@ static ssize_t amdgpu_set_pp_dpm_dcefclk(struct device *dev,
 	else
 		ret = 0;
 
-	up_read(&adev->reset_sem);
-
 	pm_runtime_mark_last_busy(ddev->dev);
 	pm_runtime_put_autosuspend(ddev->dev);
 
@@ -1540,7 +1451,7 @@ static ssize_t amdgpu_get_pp_dpm_pcie(struct device *dev,
 	ssize_t size;
 	int ret;
 
-	if (amdgpu_in_reset(adev))
+	if (adev->in_gpu_reset)
 		return -EPERM;
 
 	ret = pm_runtime_get_sync(ddev->dev);
@@ -1549,8 +1460,6 @@ static ssize_t amdgpu_get_pp_dpm_pcie(struct device *dev,
 		return ret;
 	}
 
-	down_read(&adev->reset_sem);
-
 	if (is_support_sw_smu(adev))
 		size = smu_print_clk_levels(&adev->smu, SMU_PCIE, buf);
 	else if (adev->powerplay.pp_funcs->print_clock_levels)
@@ -1558,8 +1467,6 @@ static ssize_t amdgpu_get_pp_dpm_pcie(struct device *dev,
 	else
 		size = snprintf(buf, PAGE_SIZE, "\n");
 
-	up_read(&adev->reset_sem);
-
 	pm_runtime_mark_last_busy(ddev->dev);
 	pm_runtime_put_autosuspend(ddev->dev);
 
@@ -1576,7 +1483,7 @@ static ssize_t amdgpu_set_pp_dpm_pcie(struct device *dev,
 	int ret;
 	uint32_t mask = 0;
 
-	if (amdgpu_in_reset(adev))
+	if (adev->in_gpu_reset)
 		return -EPERM;
 
 	ret = amdgpu_read_mask(buf, count, &mask);
@@ -1589,8 +1496,6 @@ static ssize_t amdgpu_set_pp_dpm_pcie(struct device *dev,
 		return ret;
 	}
 
-	down_read(&adev->reset_sem);
-
 	if (is_support_sw_smu(adev))
 		ret = smu_force_clk_levels(&adev->smu, SMU_PCIE, mask);
 	else if (adev->powerplay.pp_funcs->force_clock_level)
@@ -1598,8 +1503,6 @@ static ssize_t amdgpu_set_pp_dpm_pcie(struct device *dev,
 	else
 		ret = 0;
 
-	up_read(&adev->reset_sem);
-
 	pm_runtime_mark_last_busy(ddev->dev);
 	pm_runtime_put_autosuspend(ddev->dev);
 
@@ -1618,7 +1521,7 @@ static ssize_t amdgpu_get_pp_sclk_od(struct device *dev,
 	uint32_t value = 0;
 	int ret;
 
-	if (amdgpu_in_reset(adev))
+	if (adev->in_gpu_reset)
 		return -EPERM;
 
 	ret = pm_runtime_get_sync(ddev->dev);
@@ -1627,15 +1530,11 @@ static ssize_t amdgpu_get_pp_sclk_od(struct device *dev,
 		return ret;
 	}
 
-	down_read(&adev->reset_sem);
-
 	if (is_support_sw_smu(adev))
 		value = smu_get_od_percentage(&(adev->smu), SMU_OD_SCLK);
 	else if (adev->powerplay.pp_funcs->get_sclk_od)
 		value = amdgpu_dpm_get_sclk_od(adev);
 
-	up_read(&adev->reset_sem);
-
 	pm_runtime_mark_last_busy(ddev->dev);
 	pm_runtime_put_autosuspend(ddev->dev);
 
@@ -1652,7 +1551,7 @@ static ssize_t amdgpu_set_pp_sclk_od(struct device *dev,
 	int ret;
 	long int value;
 
-	if (amdgpu_in_reset(adev))
+	if (adev->in_gpu_reset)
 		return -EPERM;
 
 	ret = kstrtol(buf, 0, &value);
@@ -1666,8 +1565,6 @@ static ssize_t amdgpu_set_pp_sclk_od(struct device *dev,
 		return ret;
 	}
 
-	down_read(&adev->reset_sem);
-
 	if (is_support_sw_smu(adev)) {
 		value = smu_set_od_percentage(&(adev->smu), SMU_OD_SCLK, (uint32_t)value);
 	} else {
@@ -1682,8 +1579,6 @@ static ssize_t amdgpu_set_pp_sclk_od(struct device *dev,
 		}
 	}
 
-	up_read(&adev->reset_sem);
-
 	pm_runtime_mark_last_busy(ddev->dev);
 	pm_runtime_put_autosuspend(ddev->dev);
 
@@ -1699,7 +1594,7 @@ static ssize_t amdgpu_get_pp_mclk_od(struct device *dev,
 	uint32_t value = 0;
 	int ret;
 
-	if (amdgpu_in_reset(adev))
+	if (adev->in_gpu_reset)
 		return -EPERM;
 
 	ret = pm_runtime_get_sync(ddev->dev);
@@ -1708,15 +1603,11 @@ static ssize_t amdgpu_get_pp_mclk_od(struct device *dev,
 		return ret;
 	}
 
-	down_read(&adev->reset_sem);
-
 	if (is_support_sw_smu(adev))
 		value = smu_get_od_percentage(&(adev->smu), SMU_OD_MCLK);
 	else if (adev->powerplay.pp_funcs->get_mclk_od)
 		value = amdgpu_dpm_get_mclk_od(adev);
 
-	up_read(&adev->reset_sem);
-
 	pm_runtime_mark_last_busy(ddev->dev);
 	pm_runtime_put_autosuspend(ddev->dev);
 
@@ -1733,7 +1624,7 @@ static ssize_t amdgpu_set_pp_mclk_od(struct device *dev,
 	int ret;
 	long int value;
 
-	if (amdgpu_in_reset(adev))
+	if (adev->in_gpu_reset)
 		return -EPERM;
 
 	ret = kstrtol(buf, 0, &value);
@@ -1747,8 +1638,6 @@ static ssize_t amdgpu_set_pp_mclk_od(struct device *dev,
 		return ret;
 	}
 
-	down_read(&adev->reset_sem);
-
 	if (is_support_sw_smu(adev)) {
 		value = smu_set_od_percentage(&(adev->smu), SMU_OD_MCLK, (uint32_t)value);
 	} else {
@@ -1763,8 +1652,6 @@ static ssize_t amdgpu_set_pp_mclk_od(struct device *dev,
 		}
 	}
 
-	up_read(&adev->reset_sem);
-
 	pm_runtime_mark_last_busy(ddev->dev);
 	pm_runtime_put_autosuspend(ddev->dev);
 
@@ -1800,7 +1687,7 @@ static ssize_t amdgpu_get_pp_power_profile_mode(struct device *dev,
 	ssize_t size;
 	int ret;
 
-	if (amdgpu_in_reset(adev))
+	if (adev->in_gpu_reset)
 		return -EPERM;
 
 	ret = pm_runtime_get_sync(ddev->dev);
@@ -1809,8 +1696,6 @@ static ssize_t amdgpu_get_pp_power_profile_mode(struct device *dev,
 		return ret;
 	}
 
-	down_read(&adev->reset_sem);
-
 	if (is_support_sw_smu(adev))
 		size = smu_get_power_profile_mode(&adev->smu, buf);
 	else if (adev->powerplay.pp_funcs->get_power_profile_mode)
@@ -1818,8 +1703,6 @@ static ssize_t amdgpu_get_pp_power_profile_mode(struct device *dev,
 	else
 		size = snprintf(buf, PAGE_SIZE, "\n");
 
-	up_read(&adev->reset_sem);
-
 	pm_runtime_mark_last_busy(ddev->dev);
 	pm_runtime_put_autosuspend(ddev->dev);
 
@@ -1844,7 +1727,7 @@ static ssize_t amdgpu_set_pp_power_profile_mode(struct device *dev,
 	long int profile_mode = 0;
 	const char delimiter[3] = {' ', '\n', '\0'};
 
-	if (amdgpu_in_reset(adev))
+	if (adev->in_gpu_reset)
 		return -EPERM;
 
 	tmp[0] = *(buf);
@@ -1878,15 +1761,11 @@ static ssize_t amdgpu_set_pp_power_profile_mode(struct device *dev,
 		return ret;
 	}
 
-	down_read(&adev->reset_sem);
-
 	if (is_support_sw_smu(adev))
 		ret = smu_set_power_profile_mode(&adev->smu, parameter, parameter_size, true);
 	else if (adev->powerplay.pp_funcs->set_power_profile_mode)
 		ret = amdgpu_dpm_set_power_profile_mode(adev, parameter, parameter_size);
 
-	up_read(&adev->reset_sem);
-
 	pm_runtime_mark_last_busy(ddev->dev);
 	pm_runtime_put_autosuspend(ddev->dev);
 
@@ -1912,7 +1791,7 @@ static ssize_t amdgpu_get_gpu_busy_percent(struct device *dev,
 	struct amdgpu_device *adev = ddev->dev_private;
 	int r, value, size = sizeof(value);
 
-	if (amdgpu_in_reset(adev))
+	if (adev->in_gpu_reset)
 		return -EPERM;
 
 	r = pm_runtime_get_sync(ddev->dev);
@@ -1921,11 +1800,9 @@ static ssize_t amdgpu_get_gpu_busy_percent(struct device *dev,
 		return r;
 	}
 
-	down_read(&adev->reset_sem);
 	/* read the IP busy sensor */
 	r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GPU_LOAD,
 				   (void *)&value, &size);
-	up_read(&adev->reset_sem);
 
 	pm_runtime_mark_last_busy(ddev->dev);
 	pm_runtime_put_autosuspend(ddev->dev);
@@ -1952,7 +1829,7 @@ static ssize_t amdgpu_get_mem_busy_percent(struct device *dev,
 	struct amdgpu_device *adev = ddev->dev_private;
 	int r, value, size = sizeof(value);
 
-	if (amdgpu_in_reset(adev))
+	if (adev->in_gpu_reset)
 		return -EPERM;
 
 	r = pm_runtime_get_sync(ddev->dev);
@@ -1961,14 +1838,10 @@ static ssize_t amdgpu_get_mem_busy_percent(struct device *dev,
 		return r;
 	}
 
-	down_read(&adev->reset_sem);
-
 	/* read the IP busy sensor */
 	r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_MEM_LOAD,
 				   (void *)&value, &size);
 
-	up_read(&adev->reset_sem);
-
 	pm_runtime_mark_last_busy(ddev->dev);
 	pm_runtime_put_autosuspend(ddev->dev);
 
@@ -1999,7 +1872,7 @@ static ssize_t amdgpu_get_pcie_bw(struct device *dev,
 	uint64_t count0 = 0, count1 = 0;
 	int ret;
 
-	if (amdgpu_in_reset(adev))
+	if (adev->in_gpu_reset)
 		return -EPERM;
 
 	if (adev->flags & AMD_IS_APU)
@@ -2014,12 +1887,8 @@ static ssize_t amdgpu_get_pcie_bw(struct device *dev,
 		return ret;
 	}
 
-	down_read(&adev->reset_sem);
-
 	amdgpu_asic_get_pcie_usage(adev, &count0, &count1);
 
-	up_read(&adev->reset_sem);
-
 	pm_runtime_mark_last_busy(ddev->dev);
 	pm_runtime_put_autosuspend(ddev->dev);
 
@@ -2044,7 +1913,7 @@ static ssize_t amdgpu_get_unique_id(struct device *dev,
 	struct drm_device *ddev = dev_get_drvdata(dev);
 	struct amdgpu_device *adev = ddev->dev_private;
 
-	if (amdgpu_in_reset(adev))
+	if (adev->in_gpu_reset)
 		return -EPERM;
 
 	if (adev->unique_id)
@@ -2142,7 +2011,7 @@ static ssize_t amdgpu_get_gpu_metrics(struct device *dev,
 	ssize_t size = 0;
 	int ret;
 
-	if (amdgpu_in_reset(adev))
+	if (adev->in_gpu_reset)
 		return -EPERM;
 
 	ret = pm_runtime_get_sync(ddev->dev);
@@ -2151,12 +2020,10 @@ static ssize_t amdgpu_get_gpu_metrics(struct device *dev,
 		return ret;
 	}
 
-	down_read(&adev->reset_sem);
 	if (is_support_sw_smu(adev))
 		size = smu_sys_get_gpu_metrics(&adev->smu, &gpu_metrics);
 	else if (adev->powerplay.pp_funcs->get_gpu_metrics)
 		size = amdgpu_dpm_get_gpu_metrics(adev, &gpu_metrics);
-	up_read(&adev->reset_sem);
 
 	if (size <= 0)
 		goto out;
@@ -2368,7 +2235,7 @@ static ssize_t amdgpu_hwmon_show_temp(struct device *dev,
 	int channel = to_sensor_dev_attr(attr)->index;
 	int r, temp = 0, size = sizeof(temp);
 
-	if (amdgpu_in_reset(adev))
+	if (adev->in_gpu_reset)
 		return -EPERM;
 
 	if (channel >= PP_TEMP_MAX)
@@ -2380,8 +2247,6 @@ static ssize_t amdgpu_hwmon_show_temp(struct device *dev,
 		return r;
 	}
 
-	down_read(&adev->reset_sem);
-
 	switch (channel) {
 	case PP_TEMP_JUNCTION:
 		/* get current junction temperature */
@@ -2403,8 +2268,6 @@ static ssize_t amdgpu_hwmon_show_temp(struct device *dev,
 		break;
 	}
 
-	up_read(&adev->reset_sem);
-
 	pm_runtime_mark_last_busy(adev->ddev->dev);
 	pm_runtime_put_autosuspend(adev->ddev->dev);
 
@@ -2508,7 +2371,7 @@ static ssize_t amdgpu_hwmon_get_pwm1_enable(struct device *dev,
 	u32 pwm_mode = 0;
 	int ret;
 
-	if (amdgpu_in_reset(adev))
+	if (adev->in_gpu_reset)
 		return -EPERM;
 
 	ret = pm_runtime_get_sync(adev->ddev->dev);
@@ -2517,23 +2380,18 @@ static ssize_t amdgpu_hwmon_get_pwm1_enable(struct device *dev,
 		return ret;
 	}
 
-	down_read(&adev->reset_sem);
-
 	if (is_support_sw_smu(adev)) {
 		pwm_mode = smu_get_fan_control_mode(&adev->smu);
 	} else {
 		if (!adev->powerplay.pp_funcs->get_fan_control_mode) {
 			pm_runtime_mark_last_busy(adev->ddev->dev);
 			pm_runtime_put_autosuspend(adev->ddev->dev);
-			up_read(&adev->reset_sem);
 			return -EINVAL;
 		}
 
 		pwm_mode = amdgpu_dpm_get_fan_control_mode(adev);
 	}
 
-	up_read(&adev->reset_sem);
-
 	pm_runtime_mark_last_busy(adev->ddev->dev);
 	pm_runtime_put_autosuspend(adev->ddev->dev);
 
@@ -2549,7 +2407,7 @@ static ssize_t amdgpu_hwmon_set_pwm1_enable(struct device *dev,
 	int err, ret;
 	int value;
 
-	if (amdgpu_in_reset(adev))
+	if (adev->in_gpu_reset)
 		return -EPERM;
 
 	err = kstrtoint(buf, 10, &value);
@@ -2562,23 +2420,18 @@ static ssize_t amdgpu_hwmon_set_pwm1_enable(struct device *dev,
 		return ret;
 	}
 
-	down_read(&adev->reset_sem);
-
 	if (is_support_sw_smu(adev)) {
 		smu_set_fan_control_mode(&adev->smu, value);
 	} else {
 		if (!adev->powerplay.pp_funcs->set_fan_control_mode) {
 			pm_runtime_mark_last_busy(adev->ddev->dev);
 			pm_runtime_put_autosuspend(adev->ddev->dev);
-			up_read(&adev->reset_sem);
 			return -EINVAL;
 		}
 
 		amdgpu_dpm_set_fan_control_mode(adev, value);
 	}
 
-	up_read(&adev->reset_sem);
-
 	pm_runtime_mark_last_busy(adev->ddev->dev);
 	pm_runtime_put_autosuspend(adev->ddev->dev);
 
@@ -2608,7 +2461,7 @@ static ssize_t amdgpu_hwmon_set_pwm1(struct device *dev,
 	u32 value;
 	u32 pwm_mode;
 
-	if (amdgpu_in_reset(adev))
+	if (adev->in_gpu_reset)
 		return -EPERM;
 
 	err = pm_runtime_get_sync(adev->ddev->dev);
@@ -2617,15 +2470,11 @@ static ssize_t amdgpu_hwmon_set_pwm1(struct device *dev,
 		return err;
 	}
 
-	down_read(&adev->reset_sem);
-
 	if (is_support_sw_smu(adev))
 		pwm_mode = smu_get_fan_control_mode(&adev->smu);
 	else
 		pwm_mode = amdgpu_dpm_get_fan_control_mode(adev);
 
-	up_read(&adev->reset_sem);
-
 	if (pwm_mode != AMD_FAN_CTRL_MANUAL) {
 		pr_info("manual fan speed control should be enabled first\n");
 		pm_runtime_mark_last_busy(adev->ddev->dev);
@@ -2666,7 +2515,7 @@ static ssize_t amdgpu_hwmon_get_pwm1(struct device *dev,
 	int err;
 	u32 speed = 0;
 
-	if (amdgpu_in_reset(adev))
+	if (adev->in_gpu_reset)
 		return -EPERM;
 
 	err = pm_runtime_get_sync(adev->ddev->dev);
@@ -2675,8 +2524,6 @@ static ssize_t amdgpu_hwmon_get_pwm1(struct device *dev,
 		return err;
 	}
 
-	down_read(&adev->reset_sem);
-
 	if (is_support_sw_smu(adev))
 		err = smu_get_fan_speed_percent(&adev->smu, &speed);
 	else if (adev->powerplay.pp_funcs->get_fan_speed_percent)
@@ -2684,8 +2531,6 @@ static ssize_t amdgpu_hwmon_get_pwm1(struct device *dev,
 	else
 		err = -EINVAL;
 
-	up_read(&adev->reset_sem);
-
 	pm_runtime_mark_last_busy(adev->ddev->dev);
 	pm_runtime_put_autosuspend(adev->ddev->dev);
 
@@ -2705,7 +2550,7 @@ static ssize_t amdgpu_hwmon_get_fan1_input(struct device *dev,
 	int err;
 	u32 speed = 0;
 
-	if (amdgpu_in_reset(adev))
+	if (adev->in_gpu_reset)
 		return -EPERM;
 
 	err = pm_runtime_get_sync(adev->ddev->dev);
@@ -2714,8 +2559,6 @@ static ssize_t amdgpu_hwmon_get_fan1_input(struct device *dev,
 		return err;
 	}
 
-	down_read(&adev->reset_sem);
-
 	if (is_support_sw_smu(adev))
 		err = smu_get_fan_speed_rpm(&adev->smu, &speed);
 	else if (adev->powerplay.pp_funcs->get_fan_speed_rpm)
@@ -2723,8 +2566,6 @@ static ssize_t amdgpu_hwmon_get_fan1_input(struct device *dev,
 	else
 		err = -EINVAL;
 
-	up_read(&adev->reset_sem);
-
 	pm_runtime_mark_last_busy(adev->ddev->dev);
 	pm_runtime_put_autosuspend(adev->ddev->dev);
 
@@ -2743,7 +2584,7 @@ static ssize_t amdgpu_hwmon_get_fan1_min(struct device *dev,
 	u32 size = sizeof(min_rpm);
 	int r;
 
-	if (amdgpu_in_reset(adev))
+	if (adev->in_gpu_reset)
 		return -EPERM;
 
 	r = pm_runtime_get_sync(adev->ddev->dev);
@@ -2752,13 +2593,9 @@ static ssize_t amdgpu_hwmon_get_fan1_min(struct device *dev,
 		return r;
 	}
 
-	down_read(&adev->reset_sem);
-
 	r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_MIN_FAN_RPM,
 				   (void *)&min_rpm, &size);
 
-	up_read(&adev->reset_sem);
-
 	pm_runtime_mark_last_busy(adev->ddev->dev);
 	pm_runtime_put_autosuspend(adev->ddev->dev);
 
@@ -2777,7 +2614,7 @@ static ssize_t amdgpu_hwmon_get_fan1_max(struct device *dev,
 	u32 size = sizeof(max_rpm);
 	int r;
 
-	if (amdgpu_in_reset(adev))
+	if (adev->in_gpu_reset)
 		return -EPERM;
 
 	r = pm_runtime_get_sync(adev->ddev->dev);
@@ -2786,13 +2623,9 @@ static ssize_t amdgpu_hwmon_get_fan1_max(struct device *dev,
 		return r;
 	}
 
-	down_read(&adev->reset_sem);
-
 	r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_MAX_FAN_RPM,
 				   (void *)&max_rpm, &size);
 
-	up_read(&adev->reset_sem);
-
 	pm_runtime_mark_last_busy(adev->ddev->dev);
 	pm_runtime_put_autosuspend(adev->ddev->dev);
 
@@ -2810,7 +2643,7 @@ static ssize_t amdgpu_hwmon_get_fan1_target(struct device *dev,
 	int err;
 	u32 rpm = 0;
 
-	if (amdgpu_in_reset(adev))
+	if (adev->in_gpu_reset)
 		return -EPERM;
 
 	err = pm_runtime_get_sync(adev->ddev->dev);
@@ -2819,8 +2652,6 @@ static ssize_t amdgpu_hwmon_get_fan1_target(struct device *dev,
 		return err;
 	}
 
-	down_read(&adev->reset_sem);
-
 	if (is_support_sw_smu(adev))
 		err = smu_get_fan_speed_rpm(&adev->smu, &rpm);
 	else if (adev->powerplay.pp_funcs->get_fan_speed_rpm)
@@ -2828,8 +2659,6 @@ static ssize_t amdgpu_hwmon_get_fan1_target(struct device *dev,
 	else
 		err = -EINVAL;
 
-	up_read(&adev->reset_sem);
-
 	pm_runtime_mark_last_busy(adev->ddev->dev);
 	pm_runtime_put_autosuspend(adev->ddev->dev);
 
@@ -2848,7 +2677,7 @@ static ssize_t amdgpu_hwmon_set_fan1_target(struct device *dev,
 	u32 value;
 	u32 pwm_mode;
 
-	if (amdgpu_in_reset(adev))
+	if (adev->in_gpu_reset)
 		return -EPERM;
 
 	err = pm_runtime_get_sync(adev->ddev->dev);
@@ -2857,15 +2686,11 @@ static ssize_t amdgpu_hwmon_set_fan1_target(struct device *dev,
 		return err;
 	}
 
-	down_read(&adev->reset_sem);
-
 	if (is_support_sw_smu(adev))
 		pwm_mode = smu_get_fan_control_mode(&adev->smu);
 	else
 		pwm_mode = amdgpu_dpm_get_fan_control_mode(adev);
 
-	up_read(&adev->reset_sem);
-
 	if (pwm_mode != AMD_FAN_CTRL_MANUAL) {
 		pm_runtime_mark_last_busy(adev->ddev->dev);
 		pm_runtime_put_autosuspend(adev->ddev->dev);
@@ -2879,8 +2704,6 @@ static ssize_t amdgpu_hwmon_set_fan1_target(struct device *dev,
 		return err;
 	}
 
-	down_read(&adev->reset_sem);
-
 	if (is_support_sw_smu(adev))
 		err = smu_set_fan_speed_rpm(&adev->smu, value);
 	else if (adev->powerplay.pp_funcs->set_fan_speed_rpm)
@@ -2888,8 +2711,6 @@ static ssize_t amdgpu_hwmon_set_fan1_target(struct device *dev,
 	else
 		err = -EINVAL;
 
-	up_read(&adev->reset_sem);
-
 	pm_runtime_mark_last_busy(adev->ddev->dev);
 	pm_runtime_put_autosuspend(adev->ddev->dev);
 
@@ -2907,7 +2728,7 @@ static ssize_t amdgpu_hwmon_get_fan1_enable(struct device *dev,
 	u32 pwm_mode = 0;
 	int ret;
 
-	if (amdgpu_in_reset(adev))
+	if (adev->in_gpu_reset)
 		return -EPERM;
 
 	ret = pm_runtime_get_sync(adev->ddev->dev);
@@ -2916,23 +2737,18 @@ static ssize_t amdgpu_hwmon_get_fan1_enable(struct device *dev,
 		return ret;
 	}
 
-	down_read(&adev->reset_sem);
-
 	if (is_support_sw_smu(adev)) {
 		pwm_mode = smu_get_fan_control_mode(&adev->smu);
 	} else {
 		if (!adev->powerplay.pp_funcs->get_fan_control_mode) {
 			pm_runtime_mark_last_busy(adev->ddev->dev);
 			pm_runtime_put_autosuspend(adev->ddev->dev);
-			up_read(&adev->reset_sem);
 			return -EINVAL;
 		}
 
 		pwm_mode = amdgpu_dpm_get_fan_control_mode(adev);
 	}
 
-	up_read(&adev->reset_sem);
-
 	pm_runtime_mark_last_busy(adev->ddev->dev);
 	pm_runtime_put_autosuspend(adev->ddev->dev);
 
@@ -2949,7 +2765,7 @@ static ssize_t amdgpu_hwmon_set_fan1_enable(struct device *dev,
 	int value;
 	u32 pwm_mode;
 
-	if (amdgpu_in_reset(adev))
+	if (adev->in_gpu_reset)
 		return -EPERM;
 
 	err = kstrtoint(buf, 10, &value);
@@ -2969,22 +2785,17 @@ static ssize_t amdgpu_hwmon_set_fan1_enable(struct device *dev,
 		return err;
 	}
 
-	down_read(&adev->reset_sem);
-
 	if (is_support_sw_smu(adev)) {
 		smu_set_fan_control_mode(&adev->smu, pwm_mode);
 	} else {
 		if (!adev->powerplay.pp_funcs->set_fan_control_mode) {
 			pm_runtime_mark_last_busy(adev->ddev->dev);
 			pm_runtime_put_autosuspend(adev->ddev->dev);
-			up_read(&adev->reset_sem);
 			return -EINVAL;
 		}
 		amdgpu_dpm_set_fan_control_mode(adev, pwm_mode);
 	}
 
-	up_read(&adev->reset_sem);
-
 	pm_runtime_mark_last_busy(adev->ddev->dev);
 	pm_runtime_put_autosuspend(adev->ddev->dev);
 
@@ -2999,7 +2810,7 @@ static ssize_t amdgpu_hwmon_show_vddgfx(struct device *dev,
 	u32 vddgfx;
 	int r, size = sizeof(vddgfx);
 
-	if (amdgpu_in_reset(adev))
+	if (adev->in_gpu_reset)
 		return -EPERM;
 
 	r = pm_runtime_get_sync(adev->ddev->dev);
@@ -3008,11 +2819,9 @@ static ssize_t amdgpu_hwmon_show_vddgfx(struct device *dev,
 		return r;
 	}
 
-	down_read(&adev->reset_sem);
 	/* get the voltage */
 	r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_VDDGFX,
 				   (void *)&vddgfx, &size);
-	up_read(&adev->reset_sem);
 
 	pm_runtime_mark_last_busy(adev->ddev->dev);
 	pm_runtime_put_autosuspend(adev->ddev->dev);
@@ -3038,7 +2847,7 @@ static ssize_t amdgpu_hwmon_show_vddnb(struct device *dev,
 	u32 vddnb;
 	int r, size = sizeof(vddnb);
 
-	if (amdgpu_in_reset(adev))
+	if (adev->in_gpu_reset)
 		return -EPERM;
 
 	/* only APUs have vddnb */
@@ -3051,11 +2860,9 @@ static ssize_t amdgpu_hwmon_show_vddnb(struct device *dev,
 		return r;
 	}
 
-	down_read(&adev->reset_sem);
 	/* get the voltage */
 	r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_VDDNB,
 				   (void *)&vddnb, &size);
-	up_read(&adev->reset_sem);
 
 	pm_runtime_mark_last_busy(adev->ddev->dev);
 	pm_runtime_put_autosuspend(adev->ddev->dev);
@@ -3082,7 +2889,7 @@ static ssize_t amdgpu_hwmon_show_power_avg(struct device *dev,
 	int r, size = sizeof(u32);
 	unsigned uw;
 
-	if (amdgpu_in_reset(adev))
+	if (adev->in_gpu_reset)
 		return -EPERM;
 
 	r = pm_runtime_get_sync(adev->ddev->dev);
@@ -3091,11 +2898,9 @@ static ssize_t amdgpu_hwmon_show_power_avg(struct device *dev,
 		return r;
 	}
 
-	down_read(&adev->reset_sem);
 	/* get the voltage */
 	r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GPU_POWER,
 				   (void *)&query, &size);
-	up_read(&adev->reset_sem);
 
 	pm_runtime_mark_last_busy(adev->ddev->dev);
 	pm_runtime_put_autosuspend(adev->ddev->dev);
@@ -3125,7 +2930,7 @@ static ssize_t amdgpu_hwmon_show_power_cap_max(struct device *dev,
 	ssize_t size;
 	int r;
 
-	if (amdgpu_in_reset(adev))
+	if (adev->in_gpu_reset)
 		return -EPERM;
 
 	r = pm_runtime_get_sync(adev->ddev->dev);
@@ -3134,8 +2939,6 @@ static ssize_t amdgpu_hwmon_show_power_cap_max(struct device *dev,
 		return r;
 	}
 
-	down_read(&adev->reset_sem);
-
 	if (is_support_sw_smu(adev)) {
 		smu_get_power_limit(&adev->smu, &limit, true);
 		size = snprintf(buf, PAGE_SIZE, "%u\n", limit * 1000000);
@@ -3146,8 +2949,6 @@ static ssize_t amdgpu_hwmon_show_power_cap_max(struct device *dev,
 		size = snprintf(buf, PAGE_SIZE, "\n");
 	}
 
-	up_read(&adev->reset_sem);
-
 	pm_runtime_mark_last_busy(adev->ddev->dev);
 	pm_runtime_put_autosuspend(adev->ddev->dev);
 
@@ -3163,7 +2964,7 @@ static ssize_t amdgpu_hwmon_show_power_cap(struct device *dev,
 	ssize_t size;
 	int r;
 
-	if (amdgpu_in_reset(adev))
+	if (adev->in_gpu_reset)
 		return -EPERM;
 
 	r = pm_runtime_get_sync(adev->ddev->dev);
@@ -3172,8 +2973,6 @@ static ssize_t amdgpu_hwmon_show_power_cap(struct device *dev,
 		return r;
 	}
 
-	down_read(&adev->reset_sem);
-
 	if (is_support_sw_smu(adev)) {
 		smu_get_power_limit(&adev->smu, &limit, false);
 		size = snprintf(buf, PAGE_SIZE, "%u\n", limit * 1000000);
@@ -3184,8 +2983,6 @@ static ssize_t amdgpu_hwmon_show_power_cap(struct device *dev,
 		size = snprintf(buf, PAGE_SIZE, "\n");
 	}
 
-	up_read(&adev->reset_sem);
-
 	pm_runtime_mark_last_busy(adev->ddev->dev);
 	pm_runtime_put_autosuspend(adev->ddev->dev);
 
@@ -3202,7 +2999,7 @@ static ssize_t amdgpu_hwmon_set_power_cap(struct device *dev,
 	int err;
 	u32 value;
 
-	if (amdgpu_in_reset(adev))
+	if (adev->in_gpu_reset)
 		return -EPERM;
 
 	if (amdgpu_sriov_vf(adev))
@@ -3221,8 +3018,6 @@ static ssize_t amdgpu_hwmon_set_power_cap(struct device *dev,
 		return err;
 	}
 
-	down_read(&adev->reset_sem);
-
 	if (is_support_sw_smu(adev))
 		err = smu_set_power_limit(&adev->smu, value);
 	else if (adev->powerplay.pp_funcs && adev->powerplay.pp_funcs->set_power_limit)
@@ -3230,8 +3025,6 @@ static ssize_t amdgpu_hwmon_set_power_cap(struct device *dev,
 	else
 		err = -EINVAL;
 
-	up_read(&adev->reset_sem);
-
 	pm_runtime_mark_last_busy(adev->ddev->dev);
 	pm_runtime_put_autosuspend(adev->ddev->dev);
 
@@ -3249,7 +3042,7 @@ static ssize_t amdgpu_hwmon_show_sclk(struct device *dev,
 	uint32_t sclk;
 	int r, size = sizeof(sclk);
 
-	if (amdgpu_in_reset(adev))
+	if (adev->in_gpu_reset)
 		return -EPERM;
 
 	r = pm_runtime_get_sync(adev->ddev->dev);
@@ -3258,11 +3051,9 @@ static ssize_t amdgpu_hwmon_show_sclk(struct device *dev,
 		return r;
 	}
 
-	down_read(&adev->reset_sem);
 	/* get the sclk */
 	r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GFX_SCLK,
 				   (void *)&sclk, &size);
-	up_read(&adev->reset_sem);
 
 	pm_runtime_mark_last_busy(adev->ddev->dev);
 	pm_runtime_put_autosuspend(adev->ddev->dev);
@@ -3288,7 +3079,7 @@ static ssize_t amdgpu_hwmon_show_mclk(struct device *dev,
 	uint32_t mclk;
 	int r, size = sizeof(mclk);
 
-	if (amdgpu_in_reset(adev))
+	if (adev->in_gpu_reset)
 		return -EPERM;
 
 	r = pm_runtime_get_sync(adev->ddev->dev);
@@ -3297,11 +3088,9 @@ static ssize_t amdgpu_hwmon_show_mclk(struct device *dev,
 		return r;
 	}
 
-	down_read(&adev->reset_sem);
 	/* get the sclk */
 	r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GFX_MCLK,
 				   (void *)&mclk, &size);
-	up_read(&adev->reset_sem);
 
 	pm_runtime_mark_last_busy(adev->ddev->dev);
 	pm_runtime_put_autosuspend(adev->ddev->dev);
@@ -4188,7 +3977,7 @@ static int amdgpu_debugfs_pm_info(struct seq_file *m, void *data)
 	u32 flags = 0;
 	int r;
 
-	if (amdgpu_in_reset(adev))
+	if (adev->in_gpu_reset)
 		return -EPERM;
 
 	r = pm_runtime_get_sync(dev->dev);
@@ -4204,7 +3993,6 @@ static int amdgpu_debugfs_pm_info(struct seq_file *m, void *data)
 		return 0;
 	}
 
-	down_read(&adev->reset_sem);
 	if (!is_support_sw_smu(adev) &&
 	    adev->powerplay.pp_funcs->debugfs_print_current_performance_level) {
 		mutex_lock(&adev->pm.mutex);
@@ -4217,13 +4005,10 @@ static int amdgpu_debugfs_pm_info(struct seq_file *m, void *data)
 	} else {
 		r = amdgpu_debugfs_pm_info_pp(m, adev);
 	}
-	up_read(&adev->reset_sem);
 	if (r)
 		goto out;
 
-	down_read(&adev->reset_sem);
 	amdgpu_device_ip_get_clockgating_state(adev, &flags);
-	up_read(&adev->reset_sem);
 
 	seq_printf(m, "Clock Gating Flags Mask: 0x%x\n", flags);
 	amdgpu_parse_cg_state(m, flags);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
index 116a89990f39..aa1e77c60c0a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
@@ -1869,7 +1869,7 @@ static int psp_load_smu_fw(struct psp_context *psp)
 		return 0;
 
 
-	if (amdgpu_in_reset(adev) && ras && ras->supported) {
+	if (adev->in_gpu_reset && ras && ras->supported) {
 		ret = amdgpu_dpm_set_mp1_state(adev, PP_MP1_STATE_UNLOAD);
 		if (ret) {
 			DRM_WARN("Failed to set MP1 state prepare for reload\n");
@@ -1984,7 +1984,7 @@ static int psp_load_fw(struct amdgpu_device *adev)
 	int ret;
 	struct psp_context *psp = &adev->psp;
 
-	if (amdgpu_sriov_vf(adev) && amdgpu_in_reset(adev)) {
+	if (amdgpu_sriov_vf(adev) && adev->in_gpu_reset) {
 		psp_ring_stop(psp, PSP_RING_TYPE__KM); /* should not destroy ring, only stop */
 		goto skip_memalloc;
 	}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index cd1403f83dcf..f09082578865 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -2079,7 +2079,7 @@ int amdgpu_ras_late_init(struct amdgpu_device *adev,
 			amdgpu_ras_request_reset_on_boot(adev,
 					ras_block->block);
 			return 0;
-		} else if (adev->in_suspend || amdgpu_in_reset(adev)) {
+		} else if (adev->in_suspend || adev->in_gpu_reset) {
 			/* in resume phase, if fail to enable ras,
 			 * clean up all ras fs nodes, and disable ras */
 			goto cleanup;
@@ -2088,7 +2088,7 @@ int amdgpu_ras_late_init(struct amdgpu_device *adev,
 	}
 
 	/* in resume phase, no need to create ras fs node */
-	if (adev->in_suspend || amdgpu_in_reset(adev))
+	if (adev->in_suspend || adev->in_gpu_reset)
 		return 0;
 
 	if (ih_info->cb) {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 20fa0497aaa4..1e19d130473f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -2103,7 +2103,7 @@ void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, bool enable)
 	uint64_t size;
 	int r;
 
-	if (!adev->mman.initialized || amdgpu_in_reset(adev) ||
+	if (!adev->mman.initialized || adev->in_gpu_reset ||
 	    adev->mman.buffer_funcs_enabled == enable)
 		return;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
index 039245c98ff8..183743c5fb7b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
@@ -628,8 +628,7 @@ int amdgpu_ucode_init_bo(struct amdgpu_device *adev)
 	struct amdgpu_firmware_info *ucode = NULL;
 
  /* for baremetal, the ucode is allocated in gtt, so don't need to fill the bo when reset/suspend */
-	if (!amdgpu_sriov_vf(adev) &&
-		(amdgpu_in_reset(adev) || adev->in_suspend))
+	if (!amdgpu_sriov_vf(adev) && (adev->in_gpu_reset || adev->in_suspend))
 		return 0;
 	/*
 	 * if SMU loaded firmware, it needn't add SMC, UVD, and VCE
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
index 1e211544f2dc..ae720a6dc5a0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
@@ -93,7 +93,7 @@ void amdgpu_virt_kiq_reg_write_reg_wait(struct amdgpu_device *adev,
 	amdgpu_ring_undo(ring);
 	spin_unlock_irqrestore(&kiq->ring_lock, flags);
 failed_kiq:
-	dev_warn(adev->dev, "failed to write reg %x wait reg %x\n", reg0, reg1);
+	pr_err("failed to write reg %x wait reg %x\n", reg0, reg1);
 }
 
 /**
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
index b2046c3a404d..f826945989c7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
@@ -325,9 +325,9 @@ static inline bool is_virtual_machine(void)
 #define amdgpu_sriov_is_pp_one_vf(adev) \
 	((adev)->virt.gim_feature & AMDGIM_FEATURE_PP_ONE_VF)
 #define amdgpu_sriov_is_debug(adev) \
-	((!amdgpu_in_reset(adev)) && adev->virt.tdr_debug)
+	((!adev->in_gpu_reset) && adev->virt.tdr_debug)
 #define amdgpu_sriov_is_normal(adev) \
-	((!amdgpu_in_reset(adev)) && (!adev->virt.tdr_debug))
+	((!adev->in_gpu_reset) && (!adev->virt.tdr_debug))
 
 bool amdgpu_virt_mmio_blocked(struct amdgpu_device *adev);
 void amdgpu_virt_init_setting(struct amdgpu_device *adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
index 67a756f4337b..cd6e6eb7d966 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
@@ -372,7 +372,7 @@ struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev, int lo
 	tmp->hive_id = adev->gmc.xgmi.hive_id;
 	INIT_LIST_HEAD(&tmp->device_list);
 	mutex_init(&tmp->hive_lock);
-	atomic_set(&tmp->in_reset, 0);
+	mutex_init(&tmp->reset_lock);
 	task_barrier_init(&tmp->tb);
 
 	if (lock)
@@ -397,7 +397,6 @@ int amdgpu_xgmi_set_pstate(struct amdgpu_device *adev, int pstate)
 						hive->hi_req_gpu : adev;
 	bool is_hi_req = pstate == AMDGPU_XGMI_PSTATE_MAX_VEGA20;
 	bool init_low = hive->pstate == AMDGPU_XGMI_PSTATE_UNKNOWN;
-	bool locked;
 
 	/* fw bug so temporarily disable pstate switching */
 	return 0;
@@ -405,9 +404,7 @@ int amdgpu_xgmi_set_pstate(struct amdgpu_device *adev, int pstate)
 	if (!hive || adev->asic_type != CHIP_VEGA20)
 		return 0;
 
-	locked = atomic_read(&hive->in_reset) ? false : true;
-	if (locked)
-		mutex_lock(&hive->hive_lock);
+	mutex_lock(&hive->hive_lock);
 
 	if (is_hi_req)
 		hive->hi_req_count++;
@@ -442,8 +439,7 @@ int amdgpu_xgmi_set_pstate(struct amdgpu_device *adev, int pstate)
 							adev : NULL;
 	}
 out:
-	if (locked)
-		mutex_unlock(&hive->hive_lock);
+	mutex_unlock(&hive->hive_lock);
 	return ret;
 }
 
@@ -598,6 +594,7 @@ int amdgpu_xgmi_remove_device(struct amdgpu_device *adev)
 	if(!(--hive->number_devices)){
 		amdgpu_xgmi_sysfs_destroy(adev, hive);
 		mutex_destroy(&hive->hive_lock);
+		mutex_destroy(&hive->reset_lock);
 	}
 
 	return psp_xgmi_terminate(&adev->psp);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h
index 61720cd4a1ee..6999eab16a72 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h
@@ -30,8 +30,7 @@ struct amdgpu_hive_info {
 	uint64_t		hive_id;
 	struct list_head	device_list;
 	int number_devices;
-	struct mutex hive_lock;
-	atomic_t in_reset;
+	struct mutex hive_lock, reset_lock;
 	struct kobject *kobj;
 	struct device_attribute dev_attr;
 	struct amdgpu_device *adev;
diff --git a/drivers/gpu/drm/amd/amdgpu/atom.c b/drivers/gpu/drm/amd/amdgpu/atom.c
index 8341bd965202..4cfc786699c7 100644
--- a/drivers/gpu/drm/amd/amdgpu/atom.c
+++ b/drivers/gpu/drm/amd/amdgpu/atom.c
@@ -755,7 +755,6 @@ static void atom_op_jump(atom_exec_context *ctx, int *ptr, int arg)
 				/* jiffies wrap around we will just wait a little longer */
 				ctx->last_jump_jiffies = jiffies;
 			}
-			schedule();
 		} else {
 			ctx->last_jump = ctx->start + target;
 			ctx->last_jump_jiffies = jiffies;
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
index de6e6de41867..e87d43537013 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -6201,7 +6201,7 @@ static int gfx_v10_0_gfx_init_queue(struct amdgpu_ring *ring)
 	struct v10_gfx_mqd *mqd = ring->mqd_ptr;
 	int mqd_idx = ring - &adev->gfx.gfx_ring[0];
 
-	if (!amdgpu_in_reset(adev) && !adev->in_suspend) {
+	if (!adev->in_gpu_reset && !adev->in_suspend) {
 		memset((void *)mqd, 0, sizeof(*mqd));
 		mutex_lock(&adev->srbm_mutex);
 		nv_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
@@ -6213,7 +6213,7 @@ static int gfx_v10_0_gfx_init_queue(struct amdgpu_ring *ring)
 		mutex_unlock(&adev->srbm_mutex);
 		if (adev->gfx.me.mqd_backup[mqd_idx])
 			memcpy(adev->gfx.me.mqd_backup[mqd_idx], mqd, sizeof(*mqd));
-	} else if (amdgpu_in_reset(adev)) {
+	} else if (adev->in_gpu_reset) {
 		/* reset mqd with the backup copy */
 		if (adev->gfx.me.mqd_backup[mqd_idx])
 			memcpy(mqd, adev->gfx.me.mqd_backup[mqd_idx], sizeof(*mqd));
@@ -6566,7 +6566,7 @@ static int gfx_v10_0_kiq_init_queue(struct amdgpu_ring *ring)
 
 	gfx_v10_0_kiq_setting(ring);
 
-	if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */
+	if (adev->in_gpu_reset) { /* for GPU_RESET case */
 		/* reset MQD to a clean status */
 		if (adev->gfx.mec.mqd_backup[mqd_idx])
 			memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd));
@@ -6602,7 +6602,7 @@ static int gfx_v10_0_kcq_init_queue(struct amdgpu_ring *ring)
 	struct v10_compute_mqd *mqd = ring->mqd_ptr;
 	int mqd_idx = ring - &adev->gfx.compute_ring[0];
 
-	if (!amdgpu_in_reset(adev) && !adev->in_suspend) {
+	if (!adev->in_gpu_reset && !adev->in_suspend) {
 		memset((void *)mqd, 0, sizeof(*mqd));
 		mutex_lock(&adev->srbm_mutex);
 		nv_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
@@ -6612,7 +6612,7 @@ static int gfx_v10_0_kcq_init_queue(struct amdgpu_ring *ring)
 
 		if (adev->gfx.mec.mqd_backup[mqd_idx])
 			memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd));
-	} else if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */
+	} else if (adev->in_gpu_reset) { /* for GPU_RESET case */
 		/* reset MQD to a clean status */
 		if (adev->gfx.mec.mqd_backup[mqd_idx])
 			memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd));
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index 7df567a6656d..14fd04b699da 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -4633,7 +4633,7 @@ static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring)
 
 	gfx_v8_0_kiq_setting(ring);
 
-	if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */
+	if (adev->in_gpu_reset) { /* for GPU_RESET case */
 		/* reset MQD to a clean status */
 		if (adev->gfx.mec.mqd_backup[mqd_idx])
 			memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
@@ -4670,7 +4670,7 @@ static int gfx_v8_0_kcq_init_queue(struct amdgpu_ring *ring)
 	struct vi_mqd *mqd = ring->mqd_ptr;
 	int mqd_idx = ring - &adev->gfx.compute_ring[0];
 
-	if (!amdgpu_in_reset(adev) && !adev->in_suspend) {
+	if (!adev->in_gpu_reset && !adev->in_suspend) {
 		memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
 		((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
 		((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
@@ -4682,7 +4682,7 @@ static int gfx_v8_0_kcq_init_queue(struct amdgpu_ring *ring)
 
 		if (adev->gfx.mec.mqd_backup[mqd_idx])
 			memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
-	} else if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */
+	} else if (adev->in_gpu_reset) { /* for GPU_RESET case */
 		/* reset MQD to a clean status */
 		if (adev->gfx.mec.mqd_backup[mqd_idx])
 			memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index 93c63ff3b35e..2c5bb282cc01 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -3686,7 +3686,7 @@ static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring)
 
 	gfx_v9_0_kiq_setting(ring);
 
-	if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */
+	if (adev->in_gpu_reset) { /* for GPU_RESET case */
 		/* reset MQD to a clean status */
 		if (adev->gfx.mec.mqd_backup[mqd_idx])
 			memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
@@ -3724,7 +3724,7 @@ static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring)
 	struct v9_mqd *mqd = ring->mqd_ptr;
 	int mqd_idx = ring - &adev->gfx.compute_ring[0];
 
-	if (!amdgpu_in_reset(adev) && !adev->in_suspend) {
+	if (!adev->in_gpu_reset && !adev->in_suspend) {
 		memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
 		((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
 		((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
@@ -3736,7 +3736,7 @@ static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring)
 
 		if (adev->gfx.mec.mqd_backup[mqd_idx])
 			memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
-	} else if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */
+	} else if (adev->in_gpu_reset) { /* for GPU_RESET case */
 		/* reset MQD to a clean status */
 		if (adev->gfx.mec.mqd_backup[mqd_idx])
 			memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
@@ -3930,7 +3930,7 @@ static int gfx_v9_0_hw_fini(void *handle)
 	/* Use deinitialize sequence from CAIL when unbinding device from driver,
 	 * otherwise KIQ is hanging when binding back
 	 */
-	if (!amdgpu_in_reset(adev) && !adev->in_suspend) {
+	if (!adev->in_gpu_reset && !adev->in_suspend) {
 		mutex_lock(&adev->srbm_mutex);
 		soc15_grbm_select(adev, adev->gfx.kiq.ring.me,
 				adev->gfx.kiq.ring.pipe,
@@ -4088,7 +4088,7 @@ static uint64_t gfx_v9_0_kiq_read_clock(struct amdgpu_device *adev)
 	 *
 	 * also don't wait anymore for IRQ context
 	 * */
-	if (r < 1 && (amdgpu_in_reset(adev) || in_interrupt()))
+	if (r < 1 && (adev->in_gpu_reset || in_interrupt()))
 		goto failed_kiq_read;
 
 	might_sleep();
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
index 9d3b1245a339..ec8c0af39553 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
@@ -287,7 +287,7 @@ static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
 	 */
 	if (adev->gfx.kiq.ring.sched.ready &&
 	    (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev)) &&
-	    !amdgpu_in_reset(adev)) {
+	    !adev->in_gpu_reset) {
 
 		struct amdgpu_vmhub *hub = &adev->vmhub[vmhub];
 		const unsigned eng = 17;
@@ -312,7 +312,7 @@ static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
 
 	if (!adev->mman.buffer_funcs_enabled ||
 	    !adev->ib_pool_ready ||
-	    amdgpu_in_reset(adev) ||
+	    adev->in_gpu_reset ||
 	    ring->sched.ready == false) {
 		gmc_v10_0_flush_vm_hub(adev, vmid, AMDGPU_GFXHUB_0, 0);
 		mutex_unlock(&adev->mman.gtt_window_lock);
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
index 80c146df338a..3ce5c1d2fdf2 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
@@ -434,7 +434,7 @@ static int gmc_v7_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
 	int vmid;
 	unsigned int tmp;
 
-	if (amdgpu_in_reset(adev))
+	if (adev->in_gpu_reset)
 		return -EIO;
 
 	for (vmid = 1; vmid < 16; vmid++) {
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
index 9ab65ca7df77..3e6615f9d39c 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
@@ -635,7 +635,7 @@ static int gmc_v8_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
 	int vmid;
 	unsigned int tmp;
 
-	if (amdgpu_in_reset(adev))
+	if (adev->in_gpu_reset)
 		return -EIO;
 
 	for (vmid = 1; vmid < 16; vmid++) {
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 773ee11b3d17..6a780b674018 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -501,7 +501,7 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
 	 */
 	if (adev->gfx.kiq.ring.sched.ready &&
 			(amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev)) &&
-			!amdgpu_in_reset(adev)) {
+			!adev->in_gpu_reset) {
 		uint32_t req = hub->vm_inv_eng0_req + hub->eng_distance * eng;
 		uint32_t ack = hub->vm_inv_eng0_ack + hub->eng_distance * eng;
 
@@ -596,7 +596,7 @@ static int gmc_v9_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
 	struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
 	struct amdgpu_kiq *kiq = &adev->gfx.kiq;
 
-	if (amdgpu_in_reset(adev))
+	if (adev->in_gpu_reset)
 		return -EIO;
 
 	if (ring->sched.ready) {
@@ -633,8 +633,7 @@ static int gmc_v9_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
 		spin_unlock(&adev->gfx.kiq.ring_lock);
 		r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout);
 		if (r < 1) {
-			dev_info(adev->dev,
-				"wait for kiq fence error: %ld\n", r);
+			DRM_ERROR("wait for kiq fence error: %ld.\n", r);
 			return -ETIME;
 		}
 
diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
index fe31cbeccfe9..5fd67e1cc2a0 100644
--- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
@@ -238,16 +238,20 @@ static void xgpu_ai_mailbox_flr_work(struct work_struct *work)
 	struct amdgpu_virt *virt = container_of(work, struct amdgpu_virt, flr_work);
 	struct amdgpu_device *adev = container_of(virt, struct amdgpu_device, virt);
 	int timeout = AI_MAILBOX_POLL_FLR_TIMEDOUT;
+	int locked;
 
 	/* block amdgpu_gpu_recover till msg FLR COMPLETE received,
 	 * otherwise the mailbox msg will be ruined/reseted by
 	 * the VF FLR.
 	 *
-	 * we can unlock the reset_sem to allow "amdgpu_job_timedout"
+	 * we can unlock the lock_reset to allow "amdgpu_job_timedout"
 	 * to run gpu_recover() after FLR_NOTIFICATION_CMPL received
 	 * which means host side had finished this VF's FLR.
 	 */
-	down_read(&adev->reset_sem);
+	locked = mutex_trylock(&adev->lock_reset);
+	if (locked)
+		adev->in_gpu_reset = true;
+
 	do {
 		if (xgpu_ai_mailbox_peek_msg(adev) == IDH_FLR_NOTIFICATION_CMPL)
 			goto flr_done;
@@ -257,7 +261,10 @@ static void xgpu_ai_mailbox_flr_work(struct work_struct *work)
 	} while (timeout > 1);
 
 flr_done:
-	up_read(&adev->reset_sem);
+	if (locked) {
+		adev->in_gpu_reset = false;
+		mutex_unlock(&adev->lock_reset);
+	}
 
 	/* Trigger recovery for world switch failure if no TDR */
 	if (amdgpu_device_should_recover_gpu(adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c
index 6f55172e8337..ce2bf1fb79ed 100644
--- a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c
@@ -259,16 +259,20 @@ static void xgpu_nv_mailbox_flr_work(struct work_struct *work)
 	struct amdgpu_virt *virt = container_of(work, struct amdgpu_virt, flr_work);
 	struct amdgpu_device *adev = container_of(virt, struct amdgpu_device, virt);
 	int timeout = NV_MAILBOX_POLL_FLR_TIMEDOUT;
+	int locked;
 
 	/* block amdgpu_gpu_recover till msg FLR COMPLETE received,
 	 * otherwise the mailbox msg will be ruined/reseted by
 	 * the VF FLR.
 	 *
-	 * we can unlock the reset_sem to allow "amdgpu_job_timedout"
+	 * we can unlock the lock_reset to allow "amdgpu_job_timedout"
 	 * to run gpu_recover() after FLR_NOTIFICATION_CMPL received
 	 * which means host side had finished this VF's FLR.
 	 */
-	down_read(&adev->reset_sem);
+	locked = mutex_trylock(&adev->lock_reset);
+	if (locked)
+		adev->in_gpu_reset = true;
+
 	do {
 		if (xgpu_nv_mailbox_peek_msg(adev) == IDH_FLR_NOTIFICATION_CMPL)
 			goto flr_done;
@@ -278,7 +282,10 @@ static void xgpu_nv_mailbox_flr_work(struct work_struct *work)
 	} while (timeout > 1);
 
 flr_done:
-	up_read(&adev->reset_sem);
+	if (locked) {
+		adev->in_gpu_reset = false;
+		mutex_unlock(&adev->lock_reset);
+	}
 
 	/* Trigger recovery for world switch failure if no TDR */
 	if (amdgpu_device_should_recover_gpu(adev)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index 7ad1537820b5..e0e60b0d0669 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -304,17 +304,15 @@ static void deallocate_vmid(struct device_queue_manager *dqm,
 				struct qcm_process_device *qpd,
 				struct queue *q)
 {
-	if (!dqm->is_resetting) {
-		/* On GFX v7, CP doesn't flush TC at dequeue */
-		if (q->device->device_info->asic_family == CHIP_HAWAII)
-			if (flush_texture_cache_nocpsch(q->device, qpd))
-				pr_err("Failed to flush TC\n");
+	/* On GFX v7, CP doesn't flush TC at dequeue */
+	if (q->device->device_info->asic_family == CHIP_HAWAII)
+		if (flush_texture_cache_nocpsch(q->device, qpd))
+			pr_err("Failed to flush TC\n");
 
-		kfd_flush_tlb(qpd_to_pdd(qpd));
+	kfd_flush_tlb(qpd_to_pdd(qpd));
 
-		/* Release the vmid mapping */
-		set_pasid_vmid_mapping(dqm, 0, qpd->vmid);
-	}
+	/* Release the vmid mapping */
+	set_pasid_vmid_mapping(dqm, 0, qpd->vmid);
 	dqm->vmid_pasid[qpd->vmid] = 0;
 
 	qpd->vmid = 0;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index 71be897d4c2a..013c2b018edc 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -1551,10 +1551,6 @@ int kfd_reserved_mem_mmap(struct kfd_dev *dev, struct kfd_process *process,
 void kfd_flush_tlb(struct kfd_process_device *pdd)
 {
 	struct kfd_dev *dev = pdd->dev;
-	struct device_queue_manager *dqm = dev->dqm;
-
-	if (dqm->is_resetting)
-		return;
 
 	if (dev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) {
 		/* Nothing to flush until a VMID is assigned, which
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index ff5f7f7ceec6..c4daa22904da 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -1658,7 +1658,7 @@ static int dm_suspend(void *handle)
 	struct amdgpu_display_manager *dm = &adev->dm;
 	int ret = 0;
 
-	if (amdgpu_in_reset(adev)) {
+	if (adev->in_gpu_reset) {
 		mutex_lock(&dm->dc_lock);
 		dm->cached_dc_state = dc_copy_state(dm->dc->current_state);
 
@@ -1844,7 +1844,7 @@ static int dm_resume(void *handle)
 	struct dc_state *dc_state;
 	int i, r, j;
 
-	if (amdgpu_in_reset(adev)) {
+	if (adev->in_gpu_reset) {
 		dc_state = dm->cached_dc_state;
 
 		r = dm_dmub_hw_init(adev);
diff --git a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c
index 1ffacc712e53..c8e30d59e658 100644
--- a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c
+++ b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c
@@ -1110,7 +1110,7 @@ static int smu_disable_dpms(struct smu_context *smu)
 	struct amdgpu_device *adev = smu->adev;
 	int ret = 0;
 	bool use_baco = !smu->is_apu &&
-		((amdgpu_in_reset(adev) &&
+		((adev->in_gpu_reset &&
 		  (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO)) ||
 		 ((adev->in_runpm || adev->in_hibernate) && amdgpu_asic_supports_baco(adev)));
 
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c
index da84012b7fd5..c7216362b68d 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c
@@ -489,7 +489,7 @@ static int vega20_setup_asic_task(struct pp_hwmgr *hwmgr)
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)(hwmgr->adev);
 	int ret = 0;
-	bool use_baco = (amdgpu_in_reset(adev) &&
+	bool use_baco = (adev->in_gpu_reset &&
 			 (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO)) ||
 		(adev->in_runpm && amdgpu_asic_supports_baco(adev));
 
-- 
2.17.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCH] drm/amdgpu: revert "fix system hang issue during GPU reset"
  2020-08-12 15:53 [PATCH] drm/amdgpu: revert "fix system hang issue during GPU reset" Christian König
@ 2020-08-12 15:55 ` Alex Deucher
  2020-08-13 10:58   ` Christian König
  2020-08-14  1:23   ` Matt Coffin
  0 siblings, 2 replies; 4+ messages in thread
From: Alex Deucher @ 2020-08-12 15:55 UTC (permalink / raw)
  To: Christian König; +Cc: amd-gfx list

On Wed, Aug 12, 2020 at 11:54 AM Christian König
<ckoenig.leichtzumerken@gmail.com> wrote:
>
> The whole approach wasn't thought through till the end.
>
> We already had a reset lock like this in the past and it caused the same problems like this one.
>
> Completely revert the patch for now and add individual trylock protection to the hardware access functions as necessary.
>
> This reverts commit edad8312cbbf9a33c86873fc4093664f150dd5c1.
>
> Signed-off-by: Christian König <christian.koenig@amd.com>

This also broke GPU overclocking.

Acked-by: Alex Deucher <alexander.deucher@amd.com>

> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu.h           |   9 +-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c    |  40 +-
>  .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c    |   2 +-
>  .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c |   2 +-
>  .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c |   2 +-
>  .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c |   2 +-
>  .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  |   7 -
>  drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c        |   4 -
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c       |   4 -
>  drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c   |  14 +-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c    |  57 ++-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c       |   4 -
>  drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c       |   6 +-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_job.c       |  14 +-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c       |   4 -
>  drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c        | 353 ++++--------------
>  drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c       |   4 +-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c       |   4 +-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c       |   2 +-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c     |   3 +-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c      |   2 +-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h      |   4 +-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c      |  11 +-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h      |   3 +-
>  drivers/gpu/drm/amd/amdgpu/atom.c             |   1 -
>  drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c        |  10 +-
>  drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c         |   6 +-
>  drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c         |  10 +-
>  drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c        |   4 +-
>  drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c         |   2 +-
>  drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c         |   2 +-
>  drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c         |   7 +-
>  drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c         |  13 +-
>  drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c         |  13 +-
>  .../drm/amd/amdkfd/kfd_device_queue_manager.c |  16 +-
>  drivers/gpu/drm/amd/amdkfd/kfd_process.c      |   4 -
>  .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c |   4 +-
>  drivers/gpu/drm/amd/powerplay/amdgpu_smu.c    |   2 +-
>  .../drm/amd/powerplay/hwmgr/vega20_hwmgr.c    |   2 +-
>  39 files changed, 184 insertions(+), 469 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index 1f9d97f61aa5..9c6fb38ce59d 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -952,9 +952,9 @@ struct amdgpu_device {
>         bool                            in_suspend;
>         bool                            in_hibernate;
>
> -       atomic_t                        in_gpu_reset;
> +       bool                            in_gpu_reset;
>         enum pp_mp1_state               mp1_state;
> -       struct rw_semaphore     reset_sem;
> +       struct mutex  lock_reset;
>         struct amdgpu_doorbell_index doorbell_index;
>
>         struct mutex                    notifier_lock;
> @@ -1269,9 +1269,4 @@ static inline bool amdgpu_is_tmz(struct amdgpu_device *adev)
>         return adev->gmc.tmz_enabled;
>  }
>
> -static inline bool amdgpu_in_reset(struct amdgpu_device *adev)
> -{
> -       return atomic_read(&adev->in_gpu_reset) ? true : false;
> -}
> -
>  #endif
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
> index 9738dccb1c2c..0effc1d46824 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
> @@ -244,14 +244,11 @@ int amdgpu_amdkfd_alloc_gtt_mem(struct kgd_dev *kgd, size_t size,
>         if (cp_mqd_gfx9)
>                 bp.flags |= AMDGPU_GEM_CREATE_CP_MQD_GFX9;
>
> -       if (!down_read_trylock(&adev->reset_sem))
> -               return -EIO;
> -
>         r = amdgpu_bo_create(adev, &bp, &bo);
>         if (r) {
>                 dev_err(adev->dev,
>                         "failed to allocate BO for amdkfd (%d)\n", r);
> -               goto err;
> +               return r;
>         }
>
>         /* map the buffer */
> @@ -286,7 +283,6 @@ int amdgpu_amdkfd_alloc_gtt_mem(struct kgd_dev *kgd, size_t size,
>
>         amdgpu_bo_unreserve(bo);
>
> -       up_read(&adev->reset_sem);
>         return 0;
>
>  allocate_mem_kmap_bo_failed:
> @@ -295,25 +291,19 @@ int amdgpu_amdkfd_alloc_gtt_mem(struct kgd_dev *kgd, size_t size,
>         amdgpu_bo_unreserve(bo);
>  allocate_mem_reserve_bo_failed:
>         amdgpu_bo_unref(&bo);
> -err:
> -       up_read(&adev->reset_sem);
> +
>         return r;
>  }
>
>  void amdgpu_amdkfd_free_gtt_mem(struct kgd_dev *kgd, void *mem_obj)
>  {
> -       struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
>         struct amdgpu_bo *bo = (struct amdgpu_bo *) mem_obj;
>
> -       down_read(&adev->reset_sem);
> -
>         amdgpu_bo_reserve(bo, true);
>         amdgpu_bo_kunmap(bo);
>         amdgpu_bo_unpin(bo);
>         amdgpu_bo_unreserve(bo);
>         amdgpu_bo_unref(&(bo));
> -
> -       up_read(&adev->reset_sem);
>  }
>
>  int amdgpu_amdkfd_alloc_gws(struct kgd_dev *kgd, size_t size,
> @@ -345,14 +335,9 @@ int amdgpu_amdkfd_alloc_gws(struct kgd_dev *kgd, size_t size,
>
>  void amdgpu_amdkfd_free_gws(struct kgd_dev *kgd, void *mem_obj)
>  {
> -       struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
>         struct amdgpu_bo *bo = (struct amdgpu_bo *)mem_obj;
>
> -       down_read(&adev->reset_sem);
> -
>         amdgpu_bo_unref(&bo);
> -
> -       up_read(&adev->reset_sem);
>  }
>
>  uint32_t amdgpu_amdkfd_get_fw_version(struct kgd_dev *kgd,
> @@ -626,15 +611,8 @@ int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum kgd_engine_type engine,
>         /* This works for NO_HWS. TODO: need to handle without knowing VMID */
>         job->vmid = vmid;
>
> -       if (!down_read_trylock(&adev->reset_sem)) {
> -               ret = -EIO;
> -               goto err_ib_sched;
> -       }
> -
>         ret = amdgpu_ib_schedule(ring, 1, ib, job, &f);
>
> -       up_read(&adev->reset_sem);
> -
>         if (ret) {
>                 DRM_ERROR("amdgpu: failed to schedule IB.\n");
>                 goto err_ib_sched;
> @@ -670,9 +648,6 @@ int amdgpu_amdkfd_flush_gpu_tlb_vmid(struct kgd_dev *kgd, uint16_t vmid)
>  {
>         struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
>
> -       if (!down_read_trylock(&adev->reset_sem))
> -               return -EIO;
> -
>         if (adev->family == AMDGPU_FAMILY_AI) {
>                 int i;
>
> @@ -682,8 +657,6 @@ int amdgpu_amdkfd_flush_gpu_tlb_vmid(struct kgd_dev *kgd, uint16_t vmid)
>                 amdgpu_gmc_flush_gpu_tlb(adev, vmid, AMDGPU_GFXHUB_0, 0);
>         }
>
> -       up_read(&adev->reset_sem);
> -
>         return 0;
>  }
>
> @@ -692,18 +665,11 @@ int amdgpu_amdkfd_flush_gpu_tlb_pasid(struct kgd_dev *kgd, uint16_t pasid)
>         struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
>         const uint32_t flush_type = 0;
>         bool all_hub = false;
> -       int ret = -EIO;
>
>         if (adev->family == AMDGPU_FAMILY_AI)
>                 all_hub = true;
>
> -       if (down_read_trylock(&adev->reset_sem)) {
> -               ret = amdgpu_gmc_flush_gpu_tlb_pasid(adev,
> -                                       pasid, flush_type, all_hub);
> -               up_read(&adev->reset_sem);
> -       }
> -
> -       return ret;
> +       return amdgpu_gmc_flush_gpu_tlb_pasid(adev, pasid, flush_type, all_hub);
>  }
>
>  bool amdgpu_amdkfd_have_atomics_support(struct kgd_dev *kgd)
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
> index b872cdb0b705..691c89705bcd 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
> @@ -543,7 +543,7 @@ static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd,
>         uint32_t temp;
>         struct v10_compute_mqd *m = get_mqd(mqd);
>
> -       if (amdgpu_in_reset(adev))
> +       if (adev->in_gpu_reset)
>                 return -EIO;
>
>  #if 0
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
> index 832a200bb62f..0b7e78748540 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
> @@ -425,7 +425,7 @@ static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd,
>         unsigned long flags, end_jiffies;
>         int retry;
>
> -       if (amdgpu_in_reset(adev))
> +       if (adev->in_gpu_reset)
>                 return -EIO;
>
>         acquire_queue(kgd, pipe_id, queue_id);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
> index d0940121a6a9..ccd635b812b5 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
> @@ -421,7 +421,7 @@ static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd,
>         int retry;
>         struct vi_mqd *m = get_mqd(mqd);
>
> -       if (amdgpu_in_reset(adev))
> +       if (adev->in_gpu_reset)
>                 return -EIO;
>
>         acquire_queue(kgd, pipe_id, queue_id);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
> index 7e11625b419e..961424bc7a1f 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
> @@ -541,7 +541,7 @@ int kgd_gfx_v9_hqd_destroy(struct kgd_dev *kgd, void *mqd,
>         uint32_t temp;
>         struct v9_mqd *m = get_mqd(mqd);
>
> -       if (amdgpu_in_reset(adev))
> +       if (adev->in_gpu_reset)
>                 return -EIO;
>
>         acquire_queue(kgd, pipe_id, queue_id);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> index 0d75726bd228..7e2394b50fbf 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> @@ -1194,9 +1194,6 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
>                 return -EINVAL;
>         }
>
> -       if (!down_read_trylock(&adev->reset_sem))
> -               return -EIO;
> -
>         *mem = kzalloc(sizeof(struct kgd_mem), GFP_KERNEL);
>         if (!*mem) {
>                 ret = -ENOMEM;
> @@ -1263,7 +1260,6 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
>         if (offset)
>                 *offset = amdgpu_bo_mmap_offset(bo);
>
> -       up_read(&adev->reset_sem);
>         return 0;
>
>  allocate_init_user_pages_failed:
> @@ -1281,9 +1277,6 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
>                 sg_free_table(sg);
>                 kfree(sg);
>         }
> -
> -       up_read(&adev->reset_sem);
> -
>         return ret;
>  }
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> index a94b3f862fc2..ffbcaf4bfb8b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> @@ -1292,8 +1292,6 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
>         parser.adev = adev;
>         parser.filp = filp;
>
> -       down_read(&adev->reset_sem);
> -
>         r = amdgpu_cs_parser_init(&parser, data);
>         if (r) {
>                 DRM_ERROR("Failed to initialize parser %d!\n", r);
> @@ -1333,8 +1331,6 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
>  out:
>         amdgpu_cs_parser_fini(&parser, r, reserved_buffers);
>
> -       up_read(&adev->reset_sem);
> -
>         return r;
>  }
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
> index d85d13f7a043..8842c55d4490 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
> @@ -358,8 +358,6 @@ static int amdgpu_ctx_query2(struct amdgpu_device *adev,
>         if (atomic_read(&ctx->guilty))
>                 out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_GUILTY;
>
> -       down_read(&adev->reset_sem);
> -
>         /*query ue count*/
>         ras_counter = amdgpu_ras_query_error_count(adev, false);
>         /*ras counter is monotonic increasing*/
> @@ -375,8 +373,6 @@ static int amdgpu_ctx_query2(struct amdgpu_device *adev,
>                 ctx->ras_counter_ce = ras_counter;
>         }
>
> -       up_read(&adev->reset_sem);
> -
>         mutex_unlock(&mgr->lock);
>         return 0;
>  }
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
> index 0af249a1e35b..35fed75a4397 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
> @@ -101,14 +101,14 @@ static int amdgpu_debugfs_autodump_open(struct inode *inode, struct file *file)
>
>         file->private_data = adev;
>
> -       down_read(&adev->reset_sem);
> +       mutex_lock(&adev->lock_reset);
>         if (adev->autodump.dumping.done) {
>                 reinit_completion(&adev->autodump.dumping);
>                 ret = 0;
>         } else {
>                 ret = -EBUSY;
>         }
> -       up_read(&adev->reset_sem);
> +       mutex_unlock(&adev->lock_reset);
>
>         return ret;
>  }
> @@ -127,7 +127,7 @@ static unsigned int amdgpu_debugfs_autodump_poll(struct file *file, struct poll_
>
>         poll_wait(file, &adev->autodump.gpu_hang, poll_table);
>
> -       if (amdgpu_in_reset(adev))
> +       if (adev->in_gpu_reset)
>                 return POLLIN | POLLRDNORM | POLLWRNORM;
>
>         return 0;
> @@ -1242,7 +1242,7 @@ static int amdgpu_debugfs_test_ib(struct seq_file *m, void *data)
>         }
>
>         /* Avoid accidently unparking the sched thread during GPU reset */
> -       down_read(&adev->reset_sem);
> +       mutex_lock(&adev->lock_reset);
>
>         /* hold on the scheduler */
>         for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
> @@ -1269,7 +1269,7 @@ static int amdgpu_debugfs_test_ib(struct seq_file *m, void *data)
>                 kthread_unpark(ring->sched.thread);
>         }
>
> -       up_read(&adev->reset_sem);
> +       mutex_unlock(&adev->lock_reset);
>
>         pm_runtime_mark_last_busy(dev->dev);
>         pm_runtime_put_autosuspend(dev->dev);
> @@ -1459,7 +1459,7 @@ static int amdgpu_debugfs_ib_preempt(void *data, u64 val)
>                 return -ENOMEM;
>
>         /* Avoid accidently unparking the sched thread during GPU reset */
> -       down_read(&adev->reset_sem);
> +       mutex_lock(&adev->lock_reset);
>
>         /* stop the scheduler */
>         kthread_park(ring->sched.thread);
> @@ -1500,7 +1500,7 @@ static int amdgpu_debugfs_ib_preempt(void *data, u64 val)
>         /* restart the scheduler */
>         kthread_unpark(ring->sched.thread);
>
> -       up_read(&adev->reset_sem);
> +       mutex_unlock(&adev->lock_reset);
>
>         ttm_bo_unlock_delayed_workqueue(&adev->mman.bdev, resched);
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> index fe8878761c29..19aa0d7334c7 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> @@ -1940,7 +1940,7 @@ static int amdgpu_device_fw_loading(struct amdgpu_device *adev)
>                         if (adev->ip_blocks[i].status.hw == true)
>                                 break;
>
> -                       if (amdgpu_in_reset(adev) || adev->in_suspend) {
> +                       if (adev->in_gpu_reset || adev->in_suspend) {
>                                 r = adev->ip_blocks[i].version->funcs->resume(adev);
>                                 if (r) {
>                                         DRM_ERROR("resume of IP block <%s> failed %d\n",
> @@ -2117,7 +2117,7 @@ static bool amdgpu_device_check_vram_lost(struct amdgpu_device *adev)
>                         AMDGPU_RESET_MAGIC_NUM))
>                 return true;
>
> -       if (!amdgpu_in_reset(adev))
> +       if (!adev->in_gpu_reset)
>                 return false;
>
>         /*
> @@ -3055,8 +3055,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
>         mutex_init(&adev->mn_lock);
>         mutex_init(&adev->virt.vf_errors.lock);
>         hash_init(adev->mn_hash);
> -       init_rwsem(&adev->reset_sem);
> -       atomic_set(&adev->in_gpu_reset, 0);
> +       mutex_init(&adev->lock_reset);
>         mutex_init(&adev->psp.mutex);
>         mutex_init(&adev->notifier_lock);
>
> @@ -4084,11 +4083,8 @@ static int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive,
>         list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
>                 if (need_full_reset) {
>                         /* post card */
> -                       if (amdgpu_atom_asic_init(tmp_adev->mode_info.atom_context)) {
> -                               dev_warn(tmp_adev->dev, "asic atom init failed!");
> -                               r = -EAGAIN;
> -                               goto out;
> -                       }
> +                       if (amdgpu_atom_asic_init(tmp_adev->mode_info.atom_context))
> +                               DRM_WARN("asic atom init failed!");
>
>                         if (!r) {
>                                 dev_info(tmp_adev->dev, "GPU reset succeeded, trying to resume\n");
> @@ -4178,18 +4174,16 @@ static int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive,
>         return r;
>  }
>
> -static bool amdgpu_device_lock_adev(struct amdgpu_device *adev, struct amdgpu_hive_info *hive)
> +static bool amdgpu_device_lock_adev(struct amdgpu_device *adev, bool trylock)
>  {
> -       if (atomic_cmpxchg(&adev->in_gpu_reset, 0, 1) != 0)
> -               return false;
> -
> -       if (hive) {
> -               down_write_nest_lock(&adev->reset_sem, &hive->hive_lock);
> -       } else {
> -               down_write(&adev->reset_sem);
> -       }
> +       if (trylock) {
> +               if (!mutex_trylock(&adev->lock_reset))
> +                       return false;
> +       } else
> +               mutex_lock(&adev->lock_reset);
>
>         atomic_inc(&adev->gpu_reset_counter);
> +       adev->in_gpu_reset = true;
>         switch (amdgpu_asic_reset_method(adev)) {
>         case AMD_RESET_METHOD_MODE1:
>                 adev->mp1_state = PP_MP1_STATE_SHUTDOWN;
> @@ -4209,8 +4203,8 @@ static void amdgpu_device_unlock_adev(struct amdgpu_device *adev)
>  {
>         amdgpu_vf_error_trans_all(adev);
>         adev->mp1_state = PP_MP1_STATE_NONE;
> -       atomic_set(&adev->in_gpu_reset, 0);
> -       up_write(&adev->reset_sem);
> +       adev->in_gpu_reset = false;
> +       mutex_unlock(&adev->lock_reset);
>  }
>
>  static void amdgpu_device_resume_display_audio(struct amdgpu_device *adev)
> @@ -4320,14 +4314,12 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
>          * We always reset all schedulers for device and all devices for XGMI
>          * hive so that should take care of them too.
>          */
> -       hive = amdgpu_get_xgmi_hive(adev, false);
> -       if (hive) {
> -               if (atomic_cmpxchg(&hive->in_reset, 0, 1) != 0) {
> -                       DRM_INFO("Bailing on TDR for s_job:%llx, hive: %llx as another already in progress",
> -                               job ? job->base.id : -1, hive->hive_id);
> -                       return 0;
> -               }
> -               mutex_lock(&hive->hive_lock);
> +       hive = amdgpu_get_xgmi_hive(adev, true);
> +       if (hive && !mutex_trylock(&hive->reset_lock)) {
> +               DRM_INFO("Bailing on TDR for s_job:%llx, hive: %llx as another already in progress",
> +                         job ? job->base.id : -1, hive->hive_id);
> +               mutex_unlock(&hive->hive_lock);
> +               return 0;
>         }
>
>         /*
> @@ -4349,11 +4341,11 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
>
>         /* block all schedulers and reset given job's ring */
>         list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
> -               if (!amdgpu_device_lock_adev(tmp_adev, hive)) {
> +               if (!amdgpu_device_lock_adev(tmp_adev, !hive)) {
>                         DRM_INFO("Bailing on TDR for s_job:%llx, as another already in progress",
>                                   job ? job->base.id : -1);
> -                       r = 0;
> -                       goto skip_recovery;
> +                       mutex_unlock(&hive->hive_lock);
> +                       return 0;
>                 }
>
>                 /*
> @@ -4486,9 +4478,8 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
>                 amdgpu_device_unlock_adev(tmp_adev);
>         }
>
> -skip_recovery:
>         if (hive) {
> -               atomic_set(&hive->in_reset, 0);
> +               mutex_unlock(&hive->reset_lock);
>                 mutex_unlock(&hive->hive_lock);
>         }
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> index ee1e8fff83b2..8c64d8d6cb82 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> @@ -670,8 +670,6 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
>                 bo_va = NULL;
>         }
>
> -       down_read(&adev->reset_sem);
> -
>         switch (args->operation) {
>         case AMDGPU_VA_OP_MAP:
>                 va_flags = amdgpu_gem_va_map_flags(adev, args->flags);
> @@ -701,8 +699,6 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
>                 amdgpu_gem_va_update_vm(adev, &fpriv->vm, bo_va,
>                                         args->operation);
>
> -       up_read(&adev->reset_sem);
> -
>  error_backoff:
>         ttm_eu_backoff_reservation(&ticket, &list);
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> index 8ccd17d02cc6..a819360a4b6a 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> @@ -719,7 +719,7 @@ uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg)
>          *
>          * also don't wait anymore for IRQ context
>          * */
> -       if (r < 1 && (amdgpu_in_reset(adev) || in_interrupt()))
> +       if (r < 1 && (adev->in_gpu_reset || in_interrupt()))
>                 goto failed_kiq_read;
>
>         might_sleep();
> @@ -777,7 +777,7 @@ void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
>          *
>          * also don't wait anymore for IRQ context
>          * */
> -       if (r < 1 && (amdgpu_in_reset(adev) || in_interrupt()))
> +       if (r < 1 && (adev->in_gpu_reset || in_interrupt()))
>                 goto failed_kiq_write;
>
>         might_sleep();
> @@ -796,5 +796,5 @@ void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
>         amdgpu_ring_undo(ring);
>         spin_unlock_irqrestore(&kiq->ring_lock, flags);
>  failed_kiq_write:
> -       dev_warn(adev->dev, "failed to write reg:%x\n", reg);
> +       pr_err("failed to write reg:%x\n", reg);
>  }
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
> index 75d37dfb51aa..937029ad5271 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
> @@ -220,17 +220,17 @@ static struct dma_fence *amdgpu_job_run(struct drm_sched_job *sched_job)
>
>         trace_amdgpu_sched_run_job(job);
>
> -       if (down_read_trylock(&ring->adev->reset_sem)) {
> +       if (job->vram_lost_counter != atomic_read(&ring->adev->vram_lost_counter))
> +               dma_fence_set_error(finished, -ECANCELED);/* skip IB as well if VRAM lost */
> +
> +       if (finished->error < 0) {
> +               DRM_INFO("Skip scheduling IBs!\n");
> +       } else {
>                 r = amdgpu_ib_schedule(ring, job->num_ibs, job->ibs, job,
> -                                       &fence);
> -               up_read(&ring->adev->reset_sem);
> +                                      &fence);
>                 if (r)
>                         DRM_ERROR("Error scheduling IBs (%d)\n", r);
> -       } else {
> -               dma_fence_set_error(finished, -ECANCELED);
> -               DRM_INFO("Skip scheduling IBs!\n");
>         }
> -
>         /* if gpu reset, hw fence will be replaced here */
>         dma_fence_put(job->fence);
>         job->fence = dma_fence_get(fence);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> index f8de949d2510..b4a9e0478f25 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> @@ -1087,8 +1087,6 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev,
>         if (!fpriv)
>                 return;
>
> -       down_read(&adev->reset_sem);
> -
>         pm_runtime_get_sync(dev->dev);
>
>         if (amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_UVD) != NULL)
> @@ -1127,8 +1125,6 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev,
>
>         pm_runtime_mark_last_busy(dev->dev);
>         pm_runtime_put_autosuspend(dev->dev);
> -
> -       up_read(&adev->reset_sem);
>  }
>
>  /*
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
> index 1705e328c6fc..65ad174bb976 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
> @@ -163,7 +163,7 @@ static ssize_t amdgpu_get_power_dpm_state(struct device *dev,
>         enum amd_pm_state_type pm;
>         int ret;
>
> -       if (amdgpu_in_reset(adev))
> +       if (adev->in_gpu_reset)
>                 return -EPERM;
>
>         ret = pm_runtime_get_sync(ddev->dev);
> @@ -172,8 +172,6 @@ static ssize_t amdgpu_get_power_dpm_state(struct device *dev,
>                 return ret;
>         }
>
> -       down_read(&adev->reset_sem);
> -
>         if (is_support_sw_smu(adev)) {
>                 if (adev->smu.ppt_funcs->get_current_power_state)
>                         pm = smu_get_current_power_state(&adev->smu);
> @@ -185,8 +183,6 @@ static ssize_t amdgpu_get_power_dpm_state(struct device *dev,
>                 pm = adev->pm.dpm.user_state;
>         }
>
> -       up_read(&adev->reset_sem);
> -
>         pm_runtime_mark_last_busy(ddev->dev);
>         pm_runtime_put_autosuspend(ddev->dev);
>
> @@ -205,7 +201,7 @@ static ssize_t amdgpu_set_power_dpm_state(struct device *dev,
>         enum amd_pm_state_type  state;
>         int ret;
>
> -       if (amdgpu_in_reset(adev))
> +       if (adev->in_gpu_reset)
>                 return -EPERM;
>
>         if (strncmp("battery", buf, strlen("battery")) == 0)
> @@ -223,8 +219,6 @@ static ssize_t amdgpu_set_power_dpm_state(struct device *dev,
>                 return ret;
>         }
>
> -       down_read(&adev->reset_sem);
> -
>         if (is_support_sw_smu(adev)) {
>                 mutex_lock(&adev->pm.mutex);
>                 adev->pm.dpm.user_state = state;
> @@ -238,9 +232,6 @@ static ssize_t amdgpu_set_power_dpm_state(struct device *dev,
>
>                 amdgpu_pm_compute_clocks(adev);
>         }
> -
> -       up_read(&adev->reset_sem);
> -
>         pm_runtime_mark_last_busy(ddev->dev);
>         pm_runtime_put_autosuspend(ddev->dev);
>
> @@ -316,7 +307,7 @@ static ssize_t amdgpu_get_power_dpm_force_performance_level(struct device *dev,
>         enum amd_dpm_forced_level level = 0xff;
>         int ret;
>
> -       if (amdgpu_in_reset(adev))
> +       if (adev->in_gpu_reset)
>                 return -EPERM;
>
>         ret = pm_runtime_get_sync(ddev->dev);
> @@ -325,8 +316,6 @@ static ssize_t amdgpu_get_power_dpm_force_performance_level(struct device *dev,
>                 return ret;
>         }
>
> -       down_read(&adev->reset_sem);
> -
>         if (is_support_sw_smu(adev))
>                 level = smu_get_performance_level(&adev->smu);
>         else if (adev->powerplay.pp_funcs->get_performance_level)
> @@ -334,8 +323,6 @@ static ssize_t amdgpu_get_power_dpm_force_performance_level(struct device *dev,
>         else
>                 level = adev->pm.dpm.forced_level;
>
> -       up_read(&adev->reset_sem);
> -
>         pm_runtime_mark_last_busy(ddev->dev);
>         pm_runtime_put_autosuspend(ddev->dev);
>
> @@ -362,7 +349,7 @@ static ssize_t amdgpu_set_power_dpm_force_performance_level(struct device *dev,
>         enum amd_dpm_forced_level current_level = 0xff;
>         int ret = 0;
>
> -       if (amdgpu_in_reset(adev))
> +       if (adev->in_gpu_reset)
>                 return -EPERM;
>
>         if (strncmp("low", buf, strlen("low")) == 0) {
> @@ -393,8 +380,6 @@ static ssize_t amdgpu_set_power_dpm_force_performance_level(struct device *dev,
>                 return ret;
>         }
>
> -       down_read(&adev->reset_sem);
> -
>         if (is_support_sw_smu(adev))
>                 current_level = smu_get_performance_level(&adev->smu);
>         else if (adev->powerplay.pp_funcs->get_performance_level)
> @@ -403,8 +388,7 @@ static ssize_t amdgpu_set_power_dpm_force_performance_level(struct device *dev,
>         if (current_level == level) {
>                 pm_runtime_mark_last_busy(ddev->dev);
>                 pm_runtime_put_autosuspend(ddev->dev);
> -               ret = count;
> -               goto pro_end;
> +               return count;
>         }
>
>         if (adev->asic_type == CHIP_RAVEN) {
> @@ -425,8 +409,7 @@ static ssize_t amdgpu_set_power_dpm_force_performance_level(struct device *dev,
>                 pr_err("Currently not in any profile mode!\n");
>                 pm_runtime_mark_last_busy(ddev->dev);
>                 pm_runtime_put_autosuspend(ddev->dev);
> -               ret = -EINVAL;
> -               goto pro_end;
> +               return -EINVAL;
>         }
>
>         if (is_support_sw_smu(adev)) {
> @@ -434,8 +417,7 @@ static ssize_t amdgpu_set_power_dpm_force_performance_level(struct device *dev,
>                 if (ret) {
>                         pm_runtime_mark_last_busy(ddev->dev);
>                         pm_runtime_put_autosuspend(ddev->dev);
> -                       ret = -EINVAL;
> -                       goto pro_end;
> +                       return -EINVAL;
>                 }
>         } else if (adev->powerplay.pp_funcs->force_performance_level) {
>                 mutex_lock(&adev->pm.mutex);
> @@ -443,16 +425,14 @@ static ssize_t amdgpu_set_power_dpm_force_performance_level(struct device *dev,
>                         mutex_unlock(&adev->pm.mutex);
>                         pm_runtime_mark_last_busy(ddev->dev);
>                         pm_runtime_put_autosuspend(ddev->dev);
> -                       ret = -EINVAL;
> -                       goto pro_end;
> +                       return -EINVAL;
>                 }
>                 ret = amdgpu_dpm_force_performance_level(adev, level);
>                 if (ret) {
>                         mutex_unlock(&adev->pm.mutex);
>                         pm_runtime_mark_last_busy(ddev->dev);
>                         pm_runtime_put_autosuspend(ddev->dev);
> -                       ret = -EINVAL;
> -                       goto pro_end;
> +                       return -EINVAL;
>                 } else {
>                         adev->pm.dpm.forced_level = level;
>                 }
> @@ -461,9 +441,7 @@ static ssize_t amdgpu_set_power_dpm_force_performance_level(struct device *dev,
>         pm_runtime_mark_last_busy(ddev->dev);
>         pm_runtime_put_autosuspend(ddev->dev);
>
> -pro_end:
> -       up_read(&adev->reset_sem);
> -       return ret;
> +       return count;
>  }
>
>  static ssize_t amdgpu_get_pp_num_states(struct device *dev,
> @@ -475,7 +453,7 @@ static ssize_t amdgpu_get_pp_num_states(struct device *dev,
>         struct pp_states_info data;
>         int i, buf_len, ret;
>
> -       if (amdgpu_in_reset(adev))
> +       if (adev->in_gpu_reset)
>                 return -EPERM;
>
>         ret = pm_runtime_get_sync(ddev->dev);
> @@ -519,7 +497,7 @@ static ssize_t amdgpu_get_pp_cur_state(struct device *dev,
>         enum amd_pm_state_type pm = 0;
>         int i = 0, ret = 0;
>
> -       if (amdgpu_in_reset(adev))
> +       if (adev->in_gpu_reset)
>                 return -EPERM;
>
>         ret = pm_runtime_get_sync(ddev->dev);
> @@ -560,7 +538,7 @@ static ssize_t amdgpu_get_pp_force_state(struct device *dev,
>         struct drm_device *ddev = dev_get_drvdata(dev);
>         struct amdgpu_device *adev = ddev->dev_private;
>
> -       if (amdgpu_in_reset(adev))
> +       if (adev->in_gpu_reset)
>                 return -EPERM;
>
>         if (adev->pp_force_state_enabled)
> @@ -580,7 +558,7 @@ static ssize_t amdgpu_set_pp_force_state(struct device *dev,
>         unsigned long idx;
>         int ret;
>
> -       if (amdgpu_in_reset(adev))
> +       if (adev->in_gpu_reset)
>                 return -EPERM;
>
>         if (strlen(buf) == 1)
> @@ -606,7 +584,6 @@ static ssize_t amdgpu_set_pp_force_state(struct device *dev,
>                         return ret;
>                 }
>
> -               down_read(&adev->reset_sem);
>                 /* only set user selected power states */
>                 if (state != POWER_STATE_TYPE_INTERNAL_BOOT &&
>                     state != POWER_STATE_TYPE_DEFAULT) {
> @@ -614,8 +591,6 @@ static ssize_t amdgpu_set_pp_force_state(struct device *dev,
>                                         AMD_PP_TASK_ENABLE_USER_STATE, &state);
>                         adev->pp_force_state_enabled = true;
>                 }
> -               up_read(&adev->reset_sem);
> -
>                 pm_runtime_mark_last_busy(ddev->dev);
>                 pm_runtime_put_autosuspend(ddev->dev);
>         }
> @@ -643,7 +618,7 @@ static ssize_t amdgpu_get_pp_table(struct device *dev,
>         char *table = NULL;
>         int size, ret;
>
> -       if (amdgpu_in_reset(adev))
> +       if (adev->in_gpu_reset)
>                 return -EPERM;
>
>         ret = pm_runtime_get_sync(ddev->dev);
> @@ -687,7 +662,7 @@ static ssize_t amdgpu_set_pp_table(struct device *dev,
>         struct amdgpu_device *adev = ddev->dev_private;
>         int ret = 0;
>
> -       if (amdgpu_in_reset(adev))
> +       if (adev->in_gpu_reset)
>                 return -EPERM;
>
>         ret = pm_runtime_get_sync(ddev->dev);
> @@ -696,21 +671,16 @@ static ssize_t amdgpu_set_pp_table(struct device *dev,
>                 return ret;
>         }
>
> -       down_read(&adev->reset_sem);
> -
>         if (is_support_sw_smu(adev)) {
>                 ret = smu_sys_set_pp_table(&adev->smu, (void *)buf, count);
>                 if (ret) {
>                         pm_runtime_mark_last_busy(ddev->dev);
>                         pm_runtime_put_autosuspend(ddev->dev);
> -                       up_read(&adev->reset_sem);
>                         return ret;
>                 }
>         } else if (adev->powerplay.pp_funcs->set_pp_table)
>                 amdgpu_dpm_set_pp_table(adev, buf, count);
>
> -       up_read(&adev->reset_sem);
> -
>         pm_runtime_mark_last_busy(ddev->dev);
>         pm_runtime_put_autosuspend(ddev->dev);
>
> @@ -845,7 +815,7 @@ static ssize_t amdgpu_set_pp_od_clk_voltage(struct device *dev,
>         const char delimiter[3] = {' ', '\n', '\0'};
>         uint32_t type;
>
> -       if (amdgpu_in_reset(adev))
> +       if (adev->in_gpu_reset)
>                 return -EPERM;
>
>         if (count > 127)
> @@ -889,10 +859,6 @@ static ssize_t amdgpu_set_pp_od_clk_voltage(struct device *dev,
>                 return ret;
>         }
>
> -       ret = count;
> -
> -       down_read(&adev->reset_sem);
> -
>         if (is_support_sw_smu(adev)) {
>                 ret = smu_od_edit_dpm_table(&adev->smu, type,
>                                             parameter, parameter_size);
> @@ -900,8 +866,7 @@ static ssize_t amdgpu_set_pp_od_clk_voltage(struct device *dev,
>                 if (ret) {
>                         pm_runtime_mark_last_busy(ddev->dev);
>                         pm_runtime_put_autosuspend(ddev->dev);
> -                       ret = -EINVAL;
> -                       goto pro_end;
> +                       return -EINVAL;
>                 }
>         } else {
>                 if (adev->powerplay.pp_funcs->odn_edit_dpm_table) {
> @@ -910,8 +875,7 @@ static ssize_t amdgpu_set_pp_od_clk_voltage(struct device *dev,
>                         if (ret) {
>                                 pm_runtime_mark_last_busy(ddev->dev);
>                                 pm_runtime_put_autosuspend(ddev->dev);
> -                               ret = -EINVAL;
> -                               goto pro_end;
> +                               return -EINVAL;
>                         }
>                 }
>
> @@ -922,22 +886,18 @@ static ssize_t amdgpu_set_pp_od_clk_voltage(struct device *dev,
>                                                 NULL);
>                                 pm_runtime_mark_last_busy(ddev->dev);
>                                 pm_runtime_put_autosuspend(ddev->dev);
> -                               ret = count;
> -                               goto pro_end;
> +                               return count;
>                         } else {
>                                 pm_runtime_mark_last_busy(ddev->dev);
>                                 pm_runtime_put_autosuspend(ddev->dev);
> -                               ret = -EINVAL;
> -                               goto pro_end;
> +                               return -EINVAL;
>                         }
>                 }
>         }
>         pm_runtime_mark_last_busy(ddev->dev);
>         pm_runtime_put_autosuspend(ddev->dev);
>
> -pro_end:
> -       up_read(&adev->reset_sem);
> -       return ret;
> +       return count;
>  }
>
>  static ssize_t amdgpu_get_pp_od_clk_voltage(struct device *dev,
> @@ -949,7 +909,7 @@ static ssize_t amdgpu_get_pp_od_clk_voltage(struct device *dev,
>         ssize_t size;
>         int ret;
>
> -       if (amdgpu_in_reset(adev))
> +       if (adev->in_gpu_reset)
>                 return -EPERM;
>
>         ret = pm_runtime_get_sync(ddev->dev);
> @@ -1003,7 +963,7 @@ static ssize_t amdgpu_set_pp_features(struct device *dev,
>         uint64_t featuremask;
>         int ret;
>
> -       if (amdgpu_in_reset(adev))
> +       if (adev->in_gpu_reset)
>                 return -EPERM;
>
>         ret = kstrtou64(buf, 0, &featuremask);
> @@ -1018,13 +978,11 @@ static ssize_t amdgpu_set_pp_features(struct device *dev,
>                 return ret;
>         }
>
> -       down_read(&adev->reset_sem);
>         if (is_support_sw_smu(adev)) {
>                 ret = smu_sys_set_pp_feature_mask(&adev->smu, featuremask);
>                 if (ret) {
>                         pm_runtime_mark_last_busy(ddev->dev);
>                         pm_runtime_put_autosuspend(ddev->dev);
> -                       up_read(&adev->reset_sem);
>                         return -EINVAL;
>                 }
>         } else if (adev->powerplay.pp_funcs->set_ppfeature_status) {
> @@ -1032,12 +990,9 @@ static ssize_t amdgpu_set_pp_features(struct device *dev,
>                 if (ret) {
>                         pm_runtime_mark_last_busy(ddev->dev);
>                         pm_runtime_put_autosuspend(ddev->dev);
> -                       up_read(&adev->reset_sem);
>                         return -EINVAL;
>                 }
>         }
> -       up_read(&adev->reset_sem);
> -
>         pm_runtime_mark_last_busy(ddev->dev);
>         pm_runtime_put_autosuspend(ddev->dev);
>
> @@ -1053,7 +1008,7 @@ static ssize_t amdgpu_get_pp_features(struct device *dev,
>         ssize_t size;
>         int ret;
>
> -       if (amdgpu_in_reset(adev))
> +       if (adev->in_gpu_reset)
>                 return -EPERM;
>
>         ret = pm_runtime_get_sync(ddev->dev);
> @@ -1062,8 +1017,6 @@ static ssize_t amdgpu_get_pp_features(struct device *dev,
>                 return ret;
>         }
>
> -       down_read(&adev->reset_sem);
> -
>         if (is_support_sw_smu(adev))
>                 size = smu_sys_get_pp_feature_mask(&adev->smu, buf);
>         else if (adev->powerplay.pp_funcs->get_ppfeature_status)
> @@ -1071,8 +1024,6 @@ static ssize_t amdgpu_get_pp_features(struct device *dev,
>         else
>                 size = snprintf(buf, PAGE_SIZE, "\n");
>
> -       up_read(&adev->reset_sem);
> -
>         pm_runtime_mark_last_busy(ddev->dev);
>         pm_runtime_put_autosuspend(ddev->dev);
>
> @@ -1118,7 +1069,7 @@ static ssize_t amdgpu_get_pp_dpm_sclk(struct device *dev,
>         ssize_t size;
>         int ret;
>
> -       if (amdgpu_in_reset(adev))
> +       if (adev->in_gpu_reset)
>                 return -EPERM;
>
>         ret = pm_runtime_get_sync(ddev->dev);
> @@ -1127,8 +1078,6 @@ static ssize_t amdgpu_get_pp_dpm_sclk(struct device *dev,
>                 return ret;
>         }
>
> -       down_read(&adev->reset_sem);
> -
>         if (is_support_sw_smu(adev))
>                 size = smu_print_clk_levels(&adev->smu, SMU_SCLK, buf);
>         else if (adev->powerplay.pp_funcs->print_clock_levels)
> @@ -1136,8 +1085,6 @@ static ssize_t amdgpu_get_pp_dpm_sclk(struct device *dev,
>         else
>                 size = snprintf(buf, PAGE_SIZE, "\n");
>
> -       up_read(&adev->reset_sem);
> -
>         pm_runtime_mark_last_busy(ddev->dev);
>         pm_runtime_put_autosuspend(ddev->dev);
>
> @@ -1190,7 +1137,7 @@ static ssize_t amdgpu_set_pp_dpm_sclk(struct device *dev,
>         int ret;
>         uint32_t mask = 0;
>
> -       if (amdgpu_in_reset(adev))
> +       if (adev->in_gpu_reset)
>                 return -EPERM;
>
>         ret = amdgpu_read_mask(buf, count, &mask);
> @@ -1203,15 +1150,11 @@ static ssize_t amdgpu_set_pp_dpm_sclk(struct device *dev,
>                 return ret;
>         }
>
> -       down_read(&adev->reset_sem);
> -
>         if (is_support_sw_smu(adev))
>                 ret = smu_force_clk_levels(&adev->smu, SMU_SCLK, mask);
>         else if (adev->powerplay.pp_funcs->force_clock_level)
>                 ret = amdgpu_dpm_force_clock_level(adev, PP_SCLK, mask);
>
> -       up_read(&adev->reset_sem);
> -
>         pm_runtime_mark_last_busy(ddev->dev);
>         pm_runtime_put_autosuspend(ddev->dev);
>
> @@ -1230,7 +1173,7 @@ static ssize_t amdgpu_get_pp_dpm_mclk(struct device *dev,
>         ssize_t size;
>         int ret;
>
> -       if (amdgpu_in_reset(adev))
> +       if (adev->in_gpu_reset)
>                 return -EPERM;
>
>         ret = pm_runtime_get_sync(ddev->dev);
> @@ -1239,8 +1182,6 @@ static ssize_t amdgpu_get_pp_dpm_mclk(struct device *dev,
>                 return ret;
>         }
>
> -       down_read(&adev->reset_sem);
> -
>         if (is_support_sw_smu(adev))
>                 size = smu_print_clk_levels(&adev->smu, SMU_MCLK, buf);
>         else if (adev->powerplay.pp_funcs->print_clock_levels)
> @@ -1248,8 +1189,6 @@ static ssize_t amdgpu_get_pp_dpm_mclk(struct device *dev,
>         else
>                 size = snprintf(buf, PAGE_SIZE, "\n");
>
> -       up_read(&adev->reset_sem);
> -
>         pm_runtime_mark_last_busy(ddev->dev);
>         pm_runtime_put_autosuspend(ddev->dev);
>
> @@ -1266,7 +1205,7 @@ static ssize_t amdgpu_set_pp_dpm_mclk(struct device *dev,
>         uint32_t mask = 0;
>         int ret;
>
> -       if (amdgpu_in_reset(adev))
> +       if (adev->in_gpu_reset)
>                 return -EPERM;
>
>         ret = amdgpu_read_mask(buf, count, &mask);
> @@ -1279,15 +1218,11 @@ static ssize_t amdgpu_set_pp_dpm_mclk(struct device *dev,
>                 return ret;
>         }
>
> -       down_read(&adev->reset_sem);
> -
>         if (is_support_sw_smu(adev))
>                 ret = smu_force_clk_levels(&adev->smu, SMU_MCLK, mask);
>         else if (adev->powerplay.pp_funcs->force_clock_level)
>                 ret = amdgpu_dpm_force_clock_level(adev, PP_MCLK, mask);
>
> -       up_read(&adev->reset_sem);
> -
>         pm_runtime_mark_last_busy(ddev->dev);
>         pm_runtime_put_autosuspend(ddev->dev);
>
> @@ -1306,7 +1241,7 @@ static ssize_t amdgpu_get_pp_dpm_socclk(struct device *dev,
>         ssize_t size;
>         int ret;
>
> -       if (amdgpu_in_reset(adev))
> +       if (adev->in_gpu_reset)
>                 return -EPERM;
>
>         ret = pm_runtime_get_sync(ddev->dev);
> @@ -1315,8 +1250,6 @@ static ssize_t amdgpu_get_pp_dpm_socclk(struct device *dev,
>                 return ret;
>         }
>
> -       down_read(&adev->reset_sem);
> -
>         if (is_support_sw_smu(adev))
>                 size = smu_print_clk_levels(&adev->smu, SMU_SOCCLK, buf);
>         else if (adev->powerplay.pp_funcs->print_clock_levels)
> @@ -1324,8 +1257,6 @@ static ssize_t amdgpu_get_pp_dpm_socclk(struct device *dev,
>         else
>                 size = snprintf(buf, PAGE_SIZE, "\n");
>
> -       up_read(&adev->reset_sem);
> -
>         pm_runtime_mark_last_busy(ddev->dev);
>         pm_runtime_put_autosuspend(ddev->dev);
>
> @@ -1342,7 +1273,7 @@ static ssize_t amdgpu_set_pp_dpm_socclk(struct device *dev,
>         int ret;
>         uint32_t mask = 0;
>
> -       if (amdgpu_in_reset(adev))
> +       if (adev->in_gpu_reset)
>                 return -EPERM;
>
>         ret = amdgpu_read_mask(buf, count, &mask);
> @@ -1355,8 +1286,6 @@ static ssize_t amdgpu_set_pp_dpm_socclk(struct device *dev,
>                 return ret;
>         }
>
> -       down_read(&adev->reset_sem);
> -
>         if (is_support_sw_smu(adev))
>                 ret = smu_force_clk_levels(&adev->smu, SMU_SOCCLK, mask);
>         else if (adev->powerplay.pp_funcs->force_clock_level)
> @@ -1364,8 +1293,6 @@ static ssize_t amdgpu_set_pp_dpm_socclk(struct device *dev,
>         else
>                 ret = 0;
>
> -       up_read(&adev->reset_sem);
> -
>         pm_runtime_mark_last_busy(ddev->dev);
>         pm_runtime_put_autosuspend(ddev->dev);
>
> @@ -1384,7 +1311,7 @@ static ssize_t amdgpu_get_pp_dpm_fclk(struct device *dev,
>         ssize_t size;
>         int ret;
>
> -       if (amdgpu_in_reset(adev))
> +       if (adev->in_gpu_reset)
>                 return -EPERM;
>
>         ret = pm_runtime_get_sync(ddev->dev);
> @@ -1393,8 +1320,6 @@ static ssize_t amdgpu_get_pp_dpm_fclk(struct device *dev,
>                 return ret;
>         }
>
> -       down_read(&adev->reset_sem);
> -
>         if (is_support_sw_smu(adev))
>                 size = smu_print_clk_levels(&adev->smu, SMU_FCLK, buf);
>         else if (adev->powerplay.pp_funcs->print_clock_levels)
> @@ -1402,8 +1327,6 @@ static ssize_t amdgpu_get_pp_dpm_fclk(struct device *dev,
>         else
>                 size = snprintf(buf, PAGE_SIZE, "\n");
>
> -       up_read(&adev->reset_sem);
> -
>         pm_runtime_mark_last_busy(ddev->dev);
>         pm_runtime_put_autosuspend(ddev->dev);
>
> @@ -1420,7 +1343,7 @@ static ssize_t amdgpu_set_pp_dpm_fclk(struct device *dev,
>         int ret;
>         uint32_t mask = 0;
>
> -       if (amdgpu_in_reset(adev))
> +       if (adev->in_gpu_reset)
>                 return -EPERM;
>
>         ret = amdgpu_read_mask(buf, count, &mask);
> @@ -1433,8 +1356,6 @@ static ssize_t amdgpu_set_pp_dpm_fclk(struct device *dev,
>                 return ret;
>         }
>
> -       down_read(&adev->reset_sem);
> -
>         if (is_support_sw_smu(adev))
>                 ret = smu_force_clk_levels(&adev->smu, SMU_FCLK, mask);
>         else if (adev->powerplay.pp_funcs->force_clock_level)
> @@ -1442,8 +1363,6 @@ static ssize_t amdgpu_set_pp_dpm_fclk(struct device *dev,
>         else
>                 ret = 0;
>
> -       up_read(&adev->reset_sem);
> -
>         pm_runtime_mark_last_busy(ddev->dev);
>         pm_runtime_put_autosuspend(ddev->dev);
>
> @@ -1462,7 +1381,7 @@ static ssize_t amdgpu_get_pp_dpm_dcefclk(struct device *dev,
>         ssize_t size;
>         int ret;
>
> -       if (amdgpu_in_reset(adev))
> +       if (adev->in_gpu_reset)
>                 return -EPERM;
>
>         ret = pm_runtime_get_sync(ddev->dev);
> @@ -1471,8 +1390,6 @@ static ssize_t amdgpu_get_pp_dpm_dcefclk(struct device *dev,
>                 return ret;
>         }
>
> -       down_read(&adev->reset_sem);
> -
>         if (is_support_sw_smu(adev))
>                 size = smu_print_clk_levels(&adev->smu, SMU_DCEFCLK, buf);
>         else if (adev->powerplay.pp_funcs->print_clock_levels)
> @@ -1480,8 +1397,6 @@ static ssize_t amdgpu_get_pp_dpm_dcefclk(struct device *dev,
>         else
>                 size = snprintf(buf, PAGE_SIZE, "\n");
>
> -       up_read(&adev->reset_sem);
> -
>         pm_runtime_mark_last_busy(ddev->dev);
>         pm_runtime_put_autosuspend(ddev->dev);
>
> @@ -1498,7 +1413,7 @@ static ssize_t amdgpu_set_pp_dpm_dcefclk(struct device *dev,
>         int ret;
>         uint32_t mask = 0;
>
> -       if (amdgpu_in_reset(adev))
> +       if (adev->in_gpu_reset)
>                 return -EPERM;
>
>         ret = amdgpu_read_mask(buf, count, &mask);
> @@ -1511,8 +1426,6 @@ static ssize_t amdgpu_set_pp_dpm_dcefclk(struct device *dev,
>                 return ret;
>         }
>
> -       down_read(&adev->reset_sem);
> -
>         if (is_support_sw_smu(adev))
>                 ret = smu_force_clk_levels(&adev->smu, SMU_DCEFCLK, mask);
>         else if (adev->powerplay.pp_funcs->force_clock_level)
> @@ -1520,8 +1433,6 @@ static ssize_t amdgpu_set_pp_dpm_dcefclk(struct device *dev,
>         else
>                 ret = 0;
>
> -       up_read(&adev->reset_sem);
> -
>         pm_runtime_mark_last_busy(ddev->dev);
>         pm_runtime_put_autosuspend(ddev->dev);
>
> @@ -1540,7 +1451,7 @@ static ssize_t amdgpu_get_pp_dpm_pcie(struct device *dev,
>         ssize_t size;
>         int ret;
>
> -       if (amdgpu_in_reset(adev))
> +       if (adev->in_gpu_reset)
>                 return -EPERM;
>
>         ret = pm_runtime_get_sync(ddev->dev);
> @@ -1549,8 +1460,6 @@ static ssize_t amdgpu_get_pp_dpm_pcie(struct device *dev,
>                 return ret;
>         }
>
> -       down_read(&adev->reset_sem);
> -
>         if (is_support_sw_smu(adev))
>                 size = smu_print_clk_levels(&adev->smu, SMU_PCIE, buf);
>         else if (adev->powerplay.pp_funcs->print_clock_levels)
> @@ -1558,8 +1467,6 @@ static ssize_t amdgpu_get_pp_dpm_pcie(struct device *dev,
>         else
>                 size = snprintf(buf, PAGE_SIZE, "\n");
>
> -       up_read(&adev->reset_sem);
> -
>         pm_runtime_mark_last_busy(ddev->dev);
>         pm_runtime_put_autosuspend(ddev->dev);
>
> @@ -1576,7 +1483,7 @@ static ssize_t amdgpu_set_pp_dpm_pcie(struct device *dev,
>         int ret;
>         uint32_t mask = 0;
>
> -       if (amdgpu_in_reset(adev))
> +       if (adev->in_gpu_reset)
>                 return -EPERM;
>
>         ret = amdgpu_read_mask(buf, count, &mask);
> @@ -1589,8 +1496,6 @@ static ssize_t amdgpu_set_pp_dpm_pcie(struct device *dev,
>                 return ret;
>         }
>
> -       down_read(&adev->reset_sem);
> -
>         if (is_support_sw_smu(adev))
>                 ret = smu_force_clk_levels(&adev->smu, SMU_PCIE, mask);
>         else if (adev->powerplay.pp_funcs->force_clock_level)
> @@ -1598,8 +1503,6 @@ static ssize_t amdgpu_set_pp_dpm_pcie(struct device *dev,
>         else
>                 ret = 0;
>
> -       up_read(&adev->reset_sem);
> -
>         pm_runtime_mark_last_busy(ddev->dev);
>         pm_runtime_put_autosuspend(ddev->dev);
>
> @@ -1618,7 +1521,7 @@ static ssize_t amdgpu_get_pp_sclk_od(struct device *dev,
>         uint32_t value = 0;
>         int ret;
>
> -       if (amdgpu_in_reset(adev))
> +       if (adev->in_gpu_reset)
>                 return -EPERM;
>
>         ret = pm_runtime_get_sync(ddev->dev);
> @@ -1627,15 +1530,11 @@ static ssize_t amdgpu_get_pp_sclk_od(struct device *dev,
>                 return ret;
>         }
>
> -       down_read(&adev->reset_sem);
> -
>         if (is_support_sw_smu(adev))
>                 value = smu_get_od_percentage(&(adev->smu), SMU_OD_SCLK);
>         else if (adev->powerplay.pp_funcs->get_sclk_od)
>                 value = amdgpu_dpm_get_sclk_od(adev);
>
> -       up_read(&adev->reset_sem);
> -
>         pm_runtime_mark_last_busy(ddev->dev);
>         pm_runtime_put_autosuspend(ddev->dev);
>
> @@ -1652,7 +1551,7 @@ static ssize_t amdgpu_set_pp_sclk_od(struct device *dev,
>         int ret;
>         long int value;
>
> -       if (amdgpu_in_reset(adev))
> +       if (adev->in_gpu_reset)
>                 return -EPERM;
>
>         ret = kstrtol(buf, 0, &value);
> @@ -1666,8 +1565,6 @@ static ssize_t amdgpu_set_pp_sclk_od(struct device *dev,
>                 return ret;
>         }
>
> -       down_read(&adev->reset_sem);
> -
>         if (is_support_sw_smu(adev)) {
>                 value = smu_set_od_percentage(&(adev->smu), SMU_OD_SCLK, (uint32_t)value);
>         } else {
> @@ -1682,8 +1579,6 @@ static ssize_t amdgpu_set_pp_sclk_od(struct device *dev,
>                 }
>         }
>
> -       up_read(&adev->reset_sem);
> -
>         pm_runtime_mark_last_busy(ddev->dev);
>         pm_runtime_put_autosuspend(ddev->dev);
>
> @@ -1699,7 +1594,7 @@ static ssize_t amdgpu_get_pp_mclk_od(struct device *dev,
>         uint32_t value = 0;
>         int ret;
>
> -       if (amdgpu_in_reset(adev))
> +       if (adev->in_gpu_reset)
>                 return -EPERM;
>
>         ret = pm_runtime_get_sync(ddev->dev);
> @@ -1708,15 +1603,11 @@ static ssize_t amdgpu_get_pp_mclk_od(struct device *dev,
>                 return ret;
>         }
>
> -       down_read(&adev->reset_sem);
> -
>         if (is_support_sw_smu(adev))
>                 value = smu_get_od_percentage(&(adev->smu), SMU_OD_MCLK);
>         else if (adev->powerplay.pp_funcs->get_mclk_od)
>                 value = amdgpu_dpm_get_mclk_od(adev);
>
> -       up_read(&adev->reset_sem);
> -
>         pm_runtime_mark_last_busy(ddev->dev);
>         pm_runtime_put_autosuspend(ddev->dev);
>
> @@ -1733,7 +1624,7 @@ static ssize_t amdgpu_set_pp_mclk_od(struct device *dev,
>         int ret;
>         long int value;
>
> -       if (amdgpu_in_reset(adev))
> +       if (adev->in_gpu_reset)
>                 return -EPERM;
>
>         ret = kstrtol(buf, 0, &value);
> @@ -1747,8 +1638,6 @@ static ssize_t amdgpu_set_pp_mclk_od(struct device *dev,
>                 return ret;
>         }
>
> -       down_read(&adev->reset_sem);
> -
>         if (is_support_sw_smu(adev)) {
>                 value = smu_set_od_percentage(&(adev->smu), SMU_OD_MCLK, (uint32_t)value);
>         } else {
> @@ -1763,8 +1652,6 @@ static ssize_t amdgpu_set_pp_mclk_od(struct device *dev,
>                 }
>         }
>
> -       up_read(&adev->reset_sem);
> -
>         pm_runtime_mark_last_busy(ddev->dev);
>         pm_runtime_put_autosuspend(ddev->dev);
>
> @@ -1800,7 +1687,7 @@ static ssize_t amdgpu_get_pp_power_profile_mode(struct device *dev,
>         ssize_t size;
>         int ret;
>
> -       if (amdgpu_in_reset(adev))
> +       if (adev->in_gpu_reset)
>                 return -EPERM;
>
>         ret = pm_runtime_get_sync(ddev->dev);
> @@ -1809,8 +1696,6 @@ static ssize_t amdgpu_get_pp_power_profile_mode(struct device *dev,
>                 return ret;
>         }
>
> -       down_read(&adev->reset_sem);
> -
>         if (is_support_sw_smu(adev))
>                 size = smu_get_power_profile_mode(&adev->smu, buf);
>         else if (adev->powerplay.pp_funcs->get_power_profile_mode)
> @@ -1818,8 +1703,6 @@ static ssize_t amdgpu_get_pp_power_profile_mode(struct device *dev,
>         else
>                 size = snprintf(buf, PAGE_SIZE, "\n");
>
> -       up_read(&adev->reset_sem);
> -
>         pm_runtime_mark_last_busy(ddev->dev);
>         pm_runtime_put_autosuspend(ddev->dev);
>
> @@ -1844,7 +1727,7 @@ static ssize_t amdgpu_set_pp_power_profile_mode(struct device *dev,
>         long int profile_mode = 0;
>         const char delimiter[3] = {' ', '\n', '\0'};
>
> -       if (amdgpu_in_reset(adev))
> +       if (adev->in_gpu_reset)
>                 return -EPERM;
>
>         tmp[0] = *(buf);
> @@ -1878,15 +1761,11 @@ static ssize_t amdgpu_set_pp_power_profile_mode(struct device *dev,
>                 return ret;
>         }
>
> -       down_read(&adev->reset_sem);
> -
>         if (is_support_sw_smu(adev))
>                 ret = smu_set_power_profile_mode(&adev->smu, parameter, parameter_size, true);
>         else if (adev->powerplay.pp_funcs->set_power_profile_mode)
>                 ret = amdgpu_dpm_set_power_profile_mode(adev, parameter, parameter_size);
>
> -       up_read(&adev->reset_sem);
> -
>         pm_runtime_mark_last_busy(ddev->dev);
>         pm_runtime_put_autosuspend(ddev->dev);
>
> @@ -1912,7 +1791,7 @@ static ssize_t amdgpu_get_gpu_busy_percent(struct device *dev,
>         struct amdgpu_device *adev = ddev->dev_private;
>         int r, value, size = sizeof(value);
>
> -       if (amdgpu_in_reset(adev))
> +       if (adev->in_gpu_reset)
>                 return -EPERM;
>
>         r = pm_runtime_get_sync(ddev->dev);
> @@ -1921,11 +1800,9 @@ static ssize_t amdgpu_get_gpu_busy_percent(struct device *dev,
>                 return r;
>         }
>
> -       down_read(&adev->reset_sem);
>         /* read the IP busy sensor */
>         r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GPU_LOAD,
>                                    (void *)&value, &size);
> -       up_read(&adev->reset_sem);
>
>         pm_runtime_mark_last_busy(ddev->dev);
>         pm_runtime_put_autosuspend(ddev->dev);
> @@ -1952,7 +1829,7 @@ static ssize_t amdgpu_get_mem_busy_percent(struct device *dev,
>         struct amdgpu_device *adev = ddev->dev_private;
>         int r, value, size = sizeof(value);
>
> -       if (amdgpu_in_reset(adev))
> +       if (adev->in_gpu_reset)
>                 return -EPERM;
>
>         r = pm_runtime_get_sync(ddev->dev);
> @@ -1961,14 +1838,10 @@ static ssize_t amdgpu_get_mem_busy_percent(struct device *dev,
>                 return r;
>         }
>
> -       down_read(&adev->reset_sem);
> -
>         /* read the IP busy sensor */
>         r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_MEM_LOAD,
>                                    (void *)&value, &size);
>
> -       up_read(&adev->reset_sem);
> -
>         pm_runtime_mark_last_busy(ddev->dev);
>         pm_runtime_put_autosuspend(ddev->dev);
>
> @@ -1999,7 +1872,7 @@ static ssize_t amdgpu_get_pcie_bw(struct device *dev,
>         uint64_t count0 = 0, count1 = 0;
>         int ret;
>
> -       if (amdgpu_in_reset(adev))
> +       if (adev->in_gpu_reset)
>                 return -EPERM;
>
>         if (adev->flags & AMD_IS_APU)
> @@ -2014,12 +1887,8 @@ static ssize_t amdgpu_get_pcie_bw(struct device *dev,
>                 return ret;
>         }
>
> -       down_read(&adev->reset_sem);
> -
>         amdgpu_asic_get_pcie_usage(adev, &count0, &count1);
>
> -       up_read(&adev->reset_sem);
> -
>         pm_runtime_mark_last_busy(ddev->dev);
>         pm_runtime_put_autosuspend(ddev->dev);
>
> @@ -2044,7 +1913,7 @@ static ssize_t amdgpu_get_unique_id(struct device *dev,
>         struct drm_device *ddev = dev_get_drvdata(dev);
>         struct amdgpu_device *adev = ddev->dev_private;
>
> -       if (amdgpu_in_reset(adev))
> +       if (adev->in_gpu_reset)
>                 return -EPERM;
>
>         if (adev->unique_id)
> @@ -2142,7 +2011,7 @@ static ssize_t amdgpu_get_gpu_metrics(struct device *dev,
>         ssize_t size = 0;
>         int ret;
>
> -       if (amdgpu_in_reset(adev))
> +       if (adev->in_gpu_reset)
>                 return -EPERM;
>
>         ret = pm_runtime_get_sync(ddev->dev);
> @@ -2151,12 +2020,10 @@ static ssize_t amdgpu_get_gpu_metrics(struct device *dev,
>                 return ret;
>         }
>
> -       down_read(&adev->reset_sem);
>         if (is_support_sw_smu(adev))
>                 size = smu_sys_get_gpu_metrics(&adev->smu, &gpu_metrics);
>         else if (adev->powerplay.pp_funcs->get_gpu_metrics)
>                 size = amdgpu_dpm_get_gpu_metrics(adev, &gpu_metrics);
> -       up_read(&adev->reset_sem);
>
>         if (size <= 0)
>                 goto out;
> @@ -2368,7 +2235,7 @@ static ssize_t amdgpu_hwmon_show_temp(struct device *dev,
>         int channel = to_sensor_dev_attr(attr)->index;
>         int r, temp = 0, size = sizeof(temp);
>
> -       if (amdgpu_in_reset(adev))
> +       if (adev->in_gpu_reset)
>                 return -EPERM;
>
>         if (channel >= PP_TEMP_MAX)
> @@ -2380,8 +2247,6 @@ static ssize_t amdgpu_hwmon_show_temp(struct device *dev,
>                 return r;
>         }
>
> -       down_read(&adev->reset_sem);
> -
>         switch (channel) {
>         case PP_TEMP_JUNCTION:
>                 /* get current junction temperature */
> @@ -2403,8 +2268,6 @@ static ssize_t amdgpu_hwmon_show_temp(struct device *dev,
>                 break;
>         }
>
> -       up_read(&adev->reset_sem);
> -
>         pm_runtime_mark_last_busy(adev->ddev->dev);
>         pm_runtime_put_autosuspend(adev->ddev->dev);
>
> @@ -2508,7 +2371,7 @@ static ssize_t amdgpu_hwmon_get_pwm1_enable(struct device *dev,
>         u32 pwm_mode = 0;
>         int ret;
>
> -       if (amdgpu_in_reset(adev))
> +       if (adev->in_gpu_reset)
>                 return -EPERM;
>
>         ret = pm_runtime_get_sync(adev->ddev->dev);
> @@ -2517,23 +2380,18 @@ static ssize_t amdgpu_hwmon_get_pwm1_enable(struct device *dev,
>                 return ret;
>         }
>
> -       down_read(&adev->reset_sem);
> -
>         if (is_support_sw_smu(adev)) {
>                 pwm_mode = smu_get_fan_control_mode(&adev->smu);
>         } else {
>                 if (!adev->powerplay.pp_funcs->get_fan_control_mode) {
>                         pm_runtime_mark_last_busy(adev->ddev->dev);
>                         pm_runtime_put_autosuspend(adev->ddev->dev);
> -                       up_read(&adev->reset_sem);
>                         return -EINVAL;
>                 }
>
>                 pwm_mode = amdgpu_dpm_get_fan_control_mode(adev);
>         }
>
> -       up_read(&adev->reset_sem);
> -
>         pm_runtime_mark_last_busy(adev->ddev->dev);
>         pm_runtime_put_autosuspend(adev->ddev->dev);
>
> @@ -2549,7 +2407,7 @@ static ssize_t amdgpu_hwmon_set_pwm1_enable(struct device *dev,
>         int err, ret;
>         int value;
>
> -       if (amdgpu_in_reset(adev))
> +       if (adev->in_gpu_reset)
>                 return -EPERM;
>
>         err = kstrtoint(buf, 10, &value);
> @@ -2562,23 +2420,18 @@ static ssize_t amdgpu_hwmon_set_pwm1_enable(struct device *dev,
>                 return ret;
>         }
>
> -       down_read(&adev->reset_sem);
> -
>         if (is_support_sw_smu(adev)) {
>                 smu_set_fan_control_mode(&adev->smu, value);
>         } else {
>                 if (!adev->powerplay.pp_funcs->set_fan_control_mode) {
>                         pm_runtime_mark_last_busy(adev->ddev->dev);
>                         pm_runtime_put_autosuspend(adev->ddev->dev);
> -                       up_read(&adev->reset_sem);
>                         return -EINVAL;
>                 }
>
>                 amdgpu_dpm_set_fan_control_mode(adev, value);
>         }
>
> -       up_read(&adev->reset_sem);
> -
>         pm_runtime_mark_last_busy(adev->ddev->dev);
>         pm_runtime_put_autosuspend(adev->ddev->dev);
>
> @@ -2608,7 +2461,7 @@ static ssize_t amdgpu_hwmon_set_pwm1(struct device *dev,
>         u32 value;
>         u32 pwm_mode;
>
> -       if (amdgpu_in_reset(adev))
> +       if (adev->in_gpu_reset)
>                 return -EPERM;
>
>         err = pm_runtime_get_sync(adev->ddev->dev);
> @@ -2617,15 +2470,11 @@ static ssize_t amdgpu_hwmon_set_pwm1(struct device *dev,
>                 return err;
>         }
>
> -       down_read(&adev->reset_sem);
> -
>         if (is_support_sw_smu(adev))
>                 pwm_mode = smu_get_fan_control_mode(&adev->smu);
>         else
>                 pwm_mode = amdgpu_dpm_get_fan_control_mode(adev);
>
> -       up_read(&adev->reset_sem);
> -
>         if (pwm_mode != AMD_FAN_CTRL_MANUAL) {
>                 pr_info("manual fan speed control should be enabled first\n");
>                 pm_runtime_mark_last_busy(adev->ddev->dev);
> @@ -2666,7 +2515,7 @@ static ssize_t amdgpu_hwmon_get_pwm1(struct device *dev,
>         int err;
>         u32 speed = 0;
>
> -       if (amdgpu_in_reset(adev))
> +       if (adev->in_gpu_reset)
>                 return -EPERM;
>
>         err = pm_runtime_get_sync(adev->ddev->dev);
> @@ -2675,8 +2524,6 @@ static ssize_t amdgpu_hwmon_get_pwm1(struct device *dev,
>                 return err;
>         }
>
> -       down_read(&adev->reset_sem);
> -
>         if (is_support_sw_smu(adev))
>                 err = smu_get_fan_speed_percent(&adev->smu, &speed);
>         else if (adev->powerplay.pp_funcs->get_fan_speed_percent)
> @@ -2684,8 +2531,6 @@ static ssize_t amdgpu_hwmon_get_pwm1(struct device *dev,
>         else
>                 err = -EINVAL;
>
> -       up_read(&adev->reset_sem);
> -
>         pm_runtime_mark_last_busy(adev->ddev->dev);
>         pm_runtime_put_autosuspend(adev->ddev->dev);
>
> @@ -2705,7 +2550,7 @@ static ssize_t amdgpu_hwmon_get_fan1_input(struct device *dev,
>         int err;
>         u32 speed = 0;
>
> -       if (amdgpu_in_reset(adev))
> +       if (adev->in_gpu_reset)
>                 return -EPERM;
>
>         err = pm_runtime_get_sync(adev->ddev->dev);
> @@ -2714,8 +2559,6 @@ static ssize_t amdgpu_hwmon_get_fan1_input(struct device *dev,
>                 return err;
>         }
>
> -       down_read(&adev->reset_sem);
> -
>         if (is_support_sw_smu(adev))
>                 err = smu_get_fan_speed_rpm(&adev->smu, &speed);
>         else if (adev->powerplay.pp_funcs->get_fan_speed_rpm)
> @@ -2723,8 +2566,6 @@ static ssize_t amdgpu_hwmon_get_fan1_input(struct device *dev,
>         else
>                 err = -EINVAL;
>
> -       up_read(&adev->reset_sem);
> -
>         pm_runtime_mark_last_busy(adev->ddev->dev);
>         pm_runtime_put_autosuspend(adev->ddev->dev);
>
> @@ -2743,7 +2584,7 @@ static ssize_t amdgpu_hwmon_get_fan1_min(struct device *dev,
>         u32 size = sizeof(min_rpm);
>         int r;
>
> -       if (amdgpu_in_reset(adev))
> +       if (adev->in_gpu_reset)
>                 return -EPERM;
>
>         r = pm_runtime_get_sync(adev->ddev->dev);
> @@ -2752,13 +2593,9 @@ static ssize_t amdgpu_hwmon_get_fan1_min(struct device *dev,
>                 return r;
>         }
>
> -       down_read(&adev->reset_sem);
> -
>         r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_MIN_FAN_RPM,
>                                    (void *)&min_rpm, &size);
>
> -       up_read(&adev->reset_sem);
> -
>         pm_runtime_mark_last_busy(adev->ddev->dev);
>         pm_runtime_put_autosuspend(adev->ddev->dev);
>
> @@ -2777,7 +2614,7 @@ static ssize_t amdgpu_hwmon_get_fan1_max(struct device *dev,
>         u32 size = sizeof(max_rpm);
>         int r;
>
> -       if (amdgpu_in_reset(adev))
> +       if (adev->in_gpu_reset)
>                 return -EPERM;
>
>         r = pm_runtime_get_sync(adev->ddev->dev);
> @@ -2786,13 +2623,9 @@ static ssize_t amdgpu_hwmon_get_fan1_max(struct device *dev,
>                 return r;
>         }
>
> -       down_read(&adev->reset_sem);
> -
>         r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_MAX_FAN_RPM,
>                                    (void *)&max_rpm, &size);
>
> -       up_read(&adev->reset_sem);
> -
>         pm_runtime_mark_last_busy(adev->ddev->dev);
>         pm_runtime_put_autosuspend(adev->ddev->dev);
>
> @@ -2810,7 +2643,7 @@ static ssize_t amdgpu_hwmon_get_fan1_target(struct device *dev,
>         int err;
>         u32 rpm = 0;
>
> -       if (amdgpu_in_reset(adev))
> +       if (adev->in_gpu_reset)
>                 return -EPERM;
>
>         err = pm_runtime_get_sync(adev->ddev->dev);
> @@ -2819,8 +2652,6 @@ static ssize_t amdgpu_hwmon_get_fan1_target(struct device *dev,
>                 return err;
>         }
>
> -       down_read(&adev->reset_sem);
> -
>         if (is_support_sw_smu(adev))
>                 err = smu_get_fan_speed_rpm(&adev->smu, &rpm);
>         else if (adev->powerplay.pp_funcs->get_fan_speed_rpm)
> @@ -2828,8 +2659,6 @@ static ssize_t amdgpu_hwmon_get_fan1_target(struct device *dev,
>         else
>                 err = -EINVAL;
>
> -       up_read(&adev->reset_sem);
> -
>         pm_runtime_mark_last_busy(adev->ddev->dev);
>         pm_runtime_put_autosuspend(adev->ddev->dev);
>
> @@ -2848,7 +2677,7 @@ static ssize_t amdgpu_hwmon_set_fan1_target(struct device *dev,
>         u32 value;
>         u32 pwm_mode;
>
> -       if (amdgpu_in_reset(adev))
> +       if (adev->in_gpu_reset)
>                 return -EPERM;
>
>         err = pm_runtime_get_sync(adev->ddev->dev);
> @@ -2857,15 +2686,11 @@ static ssize_t amdgpu_hwmon_set_fan1_target(struct device *dev,
>                 return err;
>         }
>
> -       down_read(&adev->reset_sem);
> -
>         if (is_support_sw_smu(adev))
>                 pwm_mode = smu_get_fan_control_mode(&adev->smu);
>         else
>                 pwm_mode = amdgpu_dpm_get_fan_control_mode(adev);
>
> -       up_read(&adev->reset_sem);
> -
>         if (pwm_mode != AMD_FAN_CTRL_MANUAL) {
>                 pm_runtime_mark_last_busy(adev->ddev->dev);
>                 pm_runtime_put_autosuspend(adev->ddev->dev);
> @@ -2879,8 +2704,6 @@ static ssize_t amdgpu_hwmon_set_fan1_target(struct device *dev,
>                 return err;
>         }
>
> -       down_read(&adev->reset_sem);
> -
>         if (is_support_sw_smu(adev))
>                 err = smu_set_fan_speed_rpm(&adev->smu, value);
>         else if (adev->powerplay.pp_funcs->set_fan_speed_rpm)
> @@ -2888,8 +2711,6 @@ static ssize_t amdgpu_hwmon_set_fan1_target(struct device *dev,
>         else
>                 err = -EINVAL;
>
> -       up_read(&adev->reset_sem);
> -
>         pm_runtime_mark_last_busy(adev->ddev->dev);
>         pm_runtime_put_autosuspend(adev->ddev->dev);
>
> @@ -2907,7 +2728,7 @@ static ssize_t amdgpu_hwmon_get_fan1_enable(struct device *dev,
>         u32 pwm_mode = 0;
>         int ret;
>
> -       if (amdgpu_in_reset(adev))
> +       if (adev->in_gpu_reset)
>                 return -EPERM;
>
>         ret = pm_runtime_get_sync(adev->ddev->dev);
> @@ -2916,23 +2737,18 @@ static ssize_t amdgpu_hwmon_get_fan1_enable(struct device *dev,
>                 return ret;
>         }
>
> -       down_read(&adev->reset_sem);
> -
>         if (is_support_sw_smu(adev)) {
>                 pwm_mode = smu_get_fan_control_mode(&adev->smu);
>         } else {
>                 if (!adev->powerplay.pp_funcs->get_fan_control_mode) {
>                         pm_runtime_mark_last_busy(adev->ddev->dev);
>                         pm_runtime_put_autosuspend(adev->ddev->dev);
> -                       up_read(&adev->reset_sem);
>                         return -EINVAL;
>                 }
>
>                 pwm_mode = amdgpu_dpm_get_fan_control_mode(adev);
>         }
>
> -       up_read(&adev->reset_sem);
> -
>         pm_runtime_mark_last_busy(adev->ddev->dev);
>         pm_runtime_put_autosuspend(adev->ddev->dev);
>
> @@ -2949,7 +2765,7 @@ static ssize_t amdgpu_hwmon_set_fan1_enable(struct device *dev,
>         int value;
>         u32 pwm_mode;
>
> -       if (amdgpu_in_reset(adev))
> +       if (adev->in_gpu_reset)
>                 return -EPERM;
>
>         err = kstrtoint(buf, 10, &value);
> @@ -2969,22 +2785,17 @@ static ssize_t amdgpu_hwmon_set_fan1_enable(struct device *dev,
>                 return err;
>         }
>
> -       down_read(&adev->reset_sem);
> -
>         if (is_support_sw_smu(adev)) {
>                 smu_set_fan_control_mode(&adev->smu, pwm_mode);
>         } else {
>                 if (!adev->powerplay.pp_funcs->set_fan_control_mode) {
>                         pm_runtime_mark_last_busy(adev->ddev->dev);
>                         pm_runtime_put_autosuspend(adev->ddev->dev);
> -                       up_read(&adev->reset_sem);
>                         return -EINVAL;
>                 }
>                 amdgpu_dpm_set_fan_control_mode(adev, pwm_mode);
>         }
>
> -       up_read(&adev->reset_sem);
> -
>         pm_runtime_mark_last_busy(adev->ddev->dev);
>         pm_runtime_put_autosuspend(adev->ddev->dev);
>
> @@ -2999,7 +2810,7 @@ static ssize_t amdgpu_hwmon_show_vddgfx(struct device *dev,
>         u32 vddgfx;
>         int r, size = sizeof(vddgfx);
>
> -       if (amdgpu_in_reset(adev))
> +       if (adev->in_gpu_reset)
>                 return -EPERM;
>
>         r = pm_runtime_get_sync(adev->ddev->dev);
> @@ -3008,11 +2819,9 @@ static ssize_t amdgpu_hwmon_show_vddgfx(struct device *dev,
>                 return r;
>         }
>
> -       down_read(&adev->reset_sem);
>         /* get the voltage */
>         r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_VDDGFX,
>                                    (void *)&vddgfx, &size);
> -       up_read(&adev->reset_sem);
>
>         pm_runtime_mark_last_busy(adev->ddev->dev);
>         pm_runtime_put_autosuspend(adev->ddev->dev);
> @@ -3038,7 +2847,7 @@ static ssize_t amdgpu_hwmon_show_vddnb(struct device *dev,
>         u32 vddnb;
>         int r, size = sizeof(vddnb);
>
> -       if (amdgpu_in_reset(adev))
> +       if (adev->in_gpu_reset)
>                 return -EPERM;
>
>         /* only APUs have vddnb */
> @@ -3051,11 +2860,9 @@ static ssize_t amdgpu_hwmon_show_vddnb(struct device *dev,
>                 return r;
>         }
>
> -       down_read(&adev->reset_sem);
>         /* get the voltage */
>         r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_VDDNB,
>                                    (void *)&vddnb, &size);
> -       up_read(&adev->reset_sem);
>
>         pm_runtime_mark_last_busy(adev->ddev->dev);
>         pm_runtime_put_autosuspend(adev->ddev->dev);
> @@ -3082,7 +2889,7 @@ static ssize_t amdgpu_hwmon_show_power_avg(struct device *dev,
>         int r, size = sizeof(u32);
>         unsigned uw;
>
> -       if (amdgpu_in_reset(adev))
> +       if (adev->in_gpu_reset)
>                 return -EPERM;
>
>         r = pm_runtime_get_sync(adev->ddev->dev);
> @@ -3091,11 +2898,9 @@ static ssize_t amdgpu_hwmon_show_power_avg(struct device *dev,
>                 return r;
>         }
>
> -       down_read(&adev->reset_sem);
>         /* get the voltage */
>         r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GPU_POWER,
>                                    (void *)&query, &size);
> -       up_read(&adev->reset_sem);
>
>         pm_runtime_mark_last_busy(adev->ddev->dev);
>         pm_runtime_put_autosuspend(adev->ddev->dev);
> @@ -3125,7 +2930,7 @@ static ssize_t amdgpu_hwmon_show_power_cap_max(struct device *dev,
>         ssize_t size;
>         int r;
>
> -       if (amdgpu_in_reset(adev))
> +       if (adev->in_gpu_reset)
>                 return -EPERM;
>
>         r = pm_runtime_get_sync(adev->ddev->dev);
> @@ -3134,8 +2939,6 @@ static ssize_t amdgpu_hwmon_show_power_cap_max(struct device *dev,
>                 return r;
>         }
>
> -       down_read(&adev->reset_sem);
> -
>         if (is_support_sw_smu(adev)) {
>                 smu_get_power_limit(&adev->smu, &limit, true);
>                 size = snprintf(buf, PAGE_SIZE, "%u\n", limit * 1000000);
> @@ -3146,8 +2949,6 @@ static ssize_t amdgpu_hwmon_show_power_cap_max(struct device *dev,
>                 size = snprintf(buf, PAGE_SIZE, "\n");
>         }
>
> -       up_read(&adev->reset_sem);
> -
>         pm_runtime_mark_last_busy(adev->ddev->dev);
>         pm_runtime_put_autosuspend(adev->ddev->dev);
>
> @@ -3163,7 +2964,7 @@ static ssize_t amdgpu_hwmon_show_power_cap(struct device *dev,
>         ssize_t size;
>         int r;
>
> -       if (amdgpu_in_reset(adev))
> +       if (adev->in_gpu_reset)
>                 return -EPERM;
>
>         r = pm_runtime_get_sync(adev->ddev->dev);
> @@ -3172,8 +2973,6 @@ static ssize_t amdgpu_hwmon_show_power_cap(struct device *dev,
>                 return r;
>         }
>
> -       down_read(&adev->reset_sem);
> -
>         if (is_support_sw_smu(adev)) {
>                 smu_get_power_limit(&adev->smu, &limit, false);
>                 size = snprintf(buf, PAGE_SIZE, "%u\n", limit * 1000000);
> @@ -3184,8 +2983,6 @@ static ssize_t amdgpu_hwmon_show_power_cap(struct device *dev,
>                 size = snprintf(buf, PAGE_SIZE, "\n");
>         }
>
> -       up_read(&adev->reset_sem);
> -
>         pm_runtime_mark_last_busy(adev->ddev->dev);
>         pm_runtime_put_autosuspend(adev->ddev->dev);
>
> @@ -3202,7 +2999,7 @@ static ssize_t amdgpu_hwmon_set_power_cap(struct device *dev,
>         int err;
>         u32 value;
>
> -       if (amdgpu_in_reset(adev))
> +       if (adev->in_gpu_reset)
>                 return -EPERM;
>
>         if (amdgpu_sriov_vf(adev))
> @@ -3221,8 +3018,6 @@ static ssize_t amdgpu_hwmon_set_power_cap(struct device *dev,
>                 return err;
>         }
>
> -       down_read(&adev->reset_sem);
> -
>         if (is_support_sw_smu(adev))
>                 err = smu_set_power_limit(&adev->smu, value);
>         else if (adev->powerplay.pp_funcs && adev->powerplay.pp_funcs->set_power_limit)
> @@ -3230,8 +3025,6 @@ static ssize_t amdgpu_hwmon_set_power_cap(struct device *dev,
>         else
>                 err = -EINVAL;
>
> -       up_read(&adev->reset_sem);
> -
>         pm_runtime_mark_last_busy(adev->ddev->dev);
>         pm_runtime_put_autosuspend(adev->ddev->dev);
>
> @@ -3249,7 +3042,7 @@ static ssize_t amdgpu_hwmon_show_sclk(struct device *dev,
>         uint32_t sclk;
>         int r, size = sizeof(sclk);
>
> -       if (amdgpu_in_reset(adev))
> +       if (adev->in_gpu_reset)
>                 return -EPERM;
>
>         r = pm_runtime_get_sync(adev->ddev->dev);
> @@ -3258,11 +3051,9 @@ static ssize_t amdgpu_hwmon_show_sclk(struct device *dev,
>                 return r;
>         }
>
> -       down_read(&adev->reset_sem);
>         /* get the sclk */
>         r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GFX_SCLK,
>                                    (void *)&sclk, &size);
> -       up_read(&adev->reset_sem);
>
>         pm_runtime_mark_last_busy(adev->ddev->dev);
>         pm_runtime_put_autosuspend(adev->ddev->dev);
> @@ -3288,7 +3079,7 @@ static ssize_t amdgpu_hwmon_show_mclk(struct device *dev,
>         uint32_t mclk;
>         int r, size = sizeof(mclk);
>
> -       if (amdgpu_in_reset(adev))
> +       if (adev->in_gpu_reset)
>                 return -EPERM;
>
>         r = pm_runtime_get_sync(adev->ddev->dev);
> @@ -3297,11 +3088,9 @@ static ssize_t amdgpu_hwmon_show_mclk(struct device *dev,
>                 return r;
>         }
>
> -       down_read(&adev->reset_sem);
>         /* get the sclk */
>         r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GFX_MCLK,
>                                    (void *)&mclk, &size);
> -       up_read(&adev->reset_sem);
>
>         pm_runtime_mark_last_busy(adev->ddev->dev);
>         pm_runtime_put_autosuspend(adev->ddev->dev);
> @@ -4188,7 +3977,7 @@ static int amdgpu_debugfs_pm_info(struct seq_file *m, void *data)
>         u32 flags = 0;
>         int r;
>
> -       if (amdgpu_in_reset(adev))
> +       if (adev->in_gpu_reset)
>                 return -EPERM;
>
>         r = pm_runtime_get_sync(dev->dev);
> @@ -4204,7 +3993,6 @@ static int amdgpu_debugfs_pm_info(struct seq_file *m, void *data)
>                 return 0;
>         }
>
> -       down_read(&adev->reset_sem);
>         if (!is_support_sw_smu(adev) &&
>             adev->powerplay.pp_funcs->debugfs_print_current_performance_level) {
>                 mutex_lock(&adev->pm.mutex);
> @@ -4217,13 +4005,10 @@ static int amdgpu_debugfs_pm_info(struct seq_file *m, void *data)
>         } else {
>                 r = amdgpu_debugfs_pm_info_pp(m, adev);
>         }
> -       up_read(&adev->reset_sem);
>         if (r)
>                 goto out;
>
> -       down_read(&adev->reset_sem);
>         amdgpu_device_ip_get_clockgating_state(adev, &flags);
> -       up_read(&adev->reset_sem);
>
>         seq_printf(m, "Clock Gating Flags Mask: 0x%x\n", flags);
>         amdgpu_parse_cg_state(m, flags);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
> index 116a89990f39..aa1e77c60c0a 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
> @@ -1869,7 +1869,7 @@ static int psp_load_smu_fw(struct psp_context *psp)
>                 return 0;
>
>
> -       if (amdgpu_in_reset(adev) && ras && ras->supported) {
> +       if (adev->in_gpu_reset && ras && ras->supported) {
>                 ret = amdgpu_dpm_set_mp1_state(adev, PP_MP1_STATE_UNLOAD);
>                 if (ret) {
>                         DRM_WARN("Failed to set MP1 state prepare for reload\n");
> @@ -1984,7 +1984,7 @@ static int psp_load_fw(struct amdgpu_device *adev)
>         int ret;
>         struct psp_context *psp = &adev->psp;
>
> -       if (amdgpu_sriov_vf(adev) && amdgpu_in_reset(adev)) {
> +       if (amdgpu_sriov_vf(adev) && adev->in_gpu_reset) {
>                 psp_ring_stop(psp, PSP_RING_TYPE__KM); /* should not destroy ring, only stop */
>                 goto skip_memalloc;
>         }
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> index cd1403f83dcf..f09082578865 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> @@ -2079,7 +2079,7 @@ int amdgpu_ras_late_init(struct amdgpu_device *adev,
>                         amdgpu_ras_request_reset_on_boot(adev,
>                                         ras_block->block);
>                         return 0;
> -               } else if (adev->in_suspend || amdgpu_in_reset(adev)) {
> +               } else if (adev->in_suspend || adev->in_gpu_reset) {
>                         /* in resume phase, if fail to enable ras,
>                          * clean up all ras fs nodes, and disable ras */
>                         goto cleanup;
> @@ -2088,7 +2088,7 @@ int amdgpu_ras_late_init(struct amdgpu_device *adev,
>         }
>
>         /* in resume phase, no need to create ras fs node */
> -       if (adev->in_suspend || amdgpu_in_reset(adev))
> +       if (adev->in_suspend || adev->in_gpu_reset)
>                 return 0;
>
>         if (ih_info->cb) {
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> index 20fa0497aaa4..1e19d130473f 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> @@ -2103,7 +2103,7 @@ void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, bool enable)
>         uint64_t size;
>         int r;
>
> -       if (!adev->mman.initialized || amdgpu_in_reset(adev) ||
> +       if (!adev->mman.initialized || adev->in_gpu_reset ||
>             adev->mman.buffer_funcs_enabled == enable)
>                 return;
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
> index 039245c98ff8..183743c5fb7b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
> @@ -628,8 +628,7 @@ int amdgpu_ucode_init_bo(struct amdgpu_device *adev)
>         struct amdgpu_firmware_info *ucode = NULL;
>
>   /* for baremetal, the ucode is allocated in gtt, so don't need to fill the bo when reset/suspend */
> -       if (!amdgpu_sriov_vf(adev) &&
> -               (amdgpu_in_reset(adev) || adev->in_suspend))
> +       if (!amdgpu_sriov_vf(adev) && (adev->in_gpu_reset || adev->in_suspend))
>                 return 0;
>         /*
>          * if SMU loaded firmware, it needn't add SMC, UVD, and VCE
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
> index 1e211544f2dc..ae720a6dc5a0 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
> @@ -93,7 +93,7 @@ void amdgpu_virt_kiq_reg_write_reg_wait(struct amdgpu_device *adev,
>         amdgpu_ring_undo(ring);
>         spin_unlock_irqrestore(&kiq->ring_lock, flags);
>  failed_kiq:
> -       dev_warn(adev->dev, "failed to write reg %x wait reg %x\n", reg0, reg1);
> +       pr_err("failed to write reg %x wait reg %x\n", reg0, reg1);
>  }
>
>  /**
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
> index b2046c3a404d..f826945989c7 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
> @@ -325,9 +325,9 @@ static inline bool is_virtual_machine(void)
>  #define amdgpu_sriov_is_pp_one_vf(adev) \
>         ((adev)->virt.gim_feature & AMDGIM_FEATURE_PP_ONE_VF)
>  #define amdgpu_sriov_is_debug(adev) \
> -       ((!amdgpu_in_reset(adev)) && adev->virt.tdr_debug)
> +       ((!adev->in_gpu_reset) && adev->virt.tdr_debug)
>  #define amdgpu_sriov_is_normal(adev) \
> -       ((!amdgpu_in_reset(adev)) && (!adev->virt.tdr_debug))
> +       ((!adev->in_gpu_reset) && (!adev->virt.tdr_debug))
>
>  bool amdgpu_virt_mmio_blocked(struct amdgpu_device *adev);
>  void amdgpu_virt_init_setting(struct amdgpu_device *adev);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
> index 67a756f4337b..cd6e6eb7d966 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
> @@ -372,7 +372,7 @@ struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev, int lo
>         tmp->hive_id = adev->gmc.xgmi.hive_id;
>         INIT_LIST_HEAD(&tmp->device_list);
>         mutex_init(&tmp->hive_lock);
> -       atomic_set(&tmp->in_reset, 0);
> +       mutex_init(&tmp->reset_lock);
>         task_barrier_init(&tmp->tb);
>
>         if (lock)
> @@ -397,7 +397,6 @@ int amdgpu_xgmi_set_pstate(struct amdgpu_device *adev, int pstate)
>                                                 hive->hi_req_gpu : adev;
>         bool is_hi_req = pstate == AMDGPU_XGMI_PSTATE_MAX_VEGA20;
>         bool init_low = hive->pstate == AMDGPU_XGMI_PSTATE_UNKNOWN;
> -       bool locked;
>
>         /* fw bug so temporarily disable pstate switching */
>         return 0;
> @@ -405,9 +404,7 @@ int amdgpu_xgmi_set_pstate(struct amdgpu_device *adev, int pstate)
>         if (!hive || adev->asic_type != CHIP_VEGA20)
>                 return 0;
>
> -       locked = atomic_read(&hive->in_reset) ? false : true;
> -       if (locked)
> -               mutex_lock(&hive->hive_lock);
> +       mutex_lock(&hive->hive_lock);
>
>         if (is_hi_req)
>                 hive->hi_req_count++;
> @@ -442,8 +439,7 @@ int amdgpu_xgmi_set_pstate(struct amdgpu_device *adev, int pstate)
>                                                         adev : NULL;
>         }
>  out:
> -       if (locked)
> -               mutex_unlock(&hive->hive_lock);
> +       mutex_unlock(&hive->hive_lock);
>         return ret;
>  }
>
> @@ -598,6 +594,7 @@ int amdgpu_xgmi_remove_device(struct amdgpu_device *adev)
>         if(!(--hive->number_devices)){
>                 amdgpu_xgmi_sysfs_destroy(adev, hive);
>                 mutex_destroy(&hive->hive_lock);
> +               mutex_destroy(&hive->reset_lock);
>         }
>
>         return psp_xgmi_terminate(&adev->psp);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h
> index 61720cd4a1ee..6999eab16a72 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h
> @@ -30,8 +30,7 @@ struct amdgpu_hive_info {
>         uint64_t                hive_id;
>         struct list_head        device_list;
>         int number_devices;
> -       struct mutex hive_lock;
> -       atomic_t in_reset;
> +       struct mutex hive_lock, reset_lock;
>         struct kobject *kobj;
>         struct device_attribute dev_attr;
>         struct amdgpu_device *adev;
> diff --git a/drivers/gpu/drm/amd/amdgpu/atom.c b/drivers/gpu/drm/amd/amdgpu/atom.c
> index 8341bd965202..4cfc786699c7 100644
> --- a/drivers/gpu/drm/amd/amdgpu/atom.c
> +++ b/drivers/gpu/drm/amd/amdgpu/atom.c
> @@ -755,7 +755,6 @@ static void atom_op_jump(atom_exec_context *ctx, int *ptr, int arg)
>                                 /* jiffies wrap around we will just wait a little longer */
>                                 ctx->last_jump_jiffies = jiffies;
>                         }
> -                       schedule();
>                 } else {
>                         ctx->last_jump = ctx->start + target;
>                         ctx->last_jump_jiffies = jiffies;
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> index de6e6de41867..e87d43537013 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> @@ -6201,7 +6201,7 @@ static int gfx_v10_0_gfx_init_queue(struct amdgpu_ring *ring)
>         struct v10_gfx_mqd *mqd = ring->mqd_ptr;
>         int mqd_idx = ring - &adev->gfx.gfx_ring[0];
>
> -       if (!amdgpu_in_reset(adev) && !adev->in_suspend) {
> +       if (!adev->in_gpu_reset && !adev->in_suspend) {
>                 memset((void *)mqd, 0, sizeof(*mqd));
>                 mutex_lock(&adev->srbm_mutex);
>                 nv_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
> @@ -6213,7 +6213,7 @@ static int gfx_v10_0_gfx_init_queue(struct amdgpu_ring *ring)
>                 mutex_unlock(&adev->srbm_mutex);
>                 if (adev->gfx.me.mqd_backup[mqd_idx])
>                         memcpy(adev->gfx.me.mqd_backup[mqd_idx], mqd, sizeof(*mqd));
> -       } else if (amdgpu_in_reset(adev)) {
> +       } else if (adev->in_gpu_reset) {
>                 /* reset mqd with the backup copy */
>                 if (adev->gfx.me.mqd_backup[mqd_idx])
>                         memcpy(mqd, adev->gfx.me.mqd_backup[mqd_idx], sizeof(*mqd));
> @@ -6566,7 +6566,7 @@ static int gfx_v10_0_kiq_init_queue(struct amdgpu_ring *ring)
>
>         gfx_v10_0_kiq_setting(ring);
>
> -       if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */
> +       if (adev->in_gpu_reset) { /* for GPU_RESET case */
>                 /* reset MQD to a clean status */
>                 if (adev->gfx.mec.mqd_backup[mqd_idx])
>                         memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd));
> @@ -6602,7 +6602,7 @@ static int gfx_v10_0_kcq_init_queue(struct amdgpu_ring *ring)
>         struct v10_compute_mqd *mqd = ring->mqd_ptr;
>         int mqd_idx = ring - &adev->gfx.compute_ring[0];
>
> -       if (!amdgpu_in_reset(adev) && !adev->in_suspend) {
> +       if (!adev->in_gpu_reset && !adev->in_suspend) {
>                 memset((void *)mqd, 0, sizeof(*mqd));
>                 mutex_lock(&adev->srbm_mutex);
>                 nv_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
> @@ -6612,7 +6612,7 @@ static int gfx_v10_0_kcq_init_queue(struct amdgpu_ring *ring)
>
>                 if (adev->gfx.mec.mqd_backup[mqd_idx])
>                         memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd));
> -       } else if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */
> +       } else if (adev->in_gpu_reset) { /* for GPU_RESET case */
>                 /* reset MQD to a clean status */
>                 if (adev->gfx.mec.mqd_backup[mqd_idx])
>                         memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd));
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> index 7df567a6656d..14fd04b699da 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> @@ -4633,7 +4633,7 @@ static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring)
>
>         gfx_v8_0_kiq_setting(ring);
>
> -       if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */
> +       if (adev->in_gpu_reset) { /* for GPU_RESET case */
>                 /* reset MQD to a clean status */
>                 if (adev->gfx.mec.mqd_backup[mqd_idx])
>                         memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
> @@ -4670,7 +4670,7 @@ static int gfx_v8_0_kcq_init_queue(struct amdgpu_ring *ring)
>         struct vi_mqd *mqd = ring->mqd_ptr;
>         int mqd_idx = ring - &adev->gfx.compute_ring[0];
>
> -       if (!amdgpu_in_reset(adev) && !adev->in_suspend) {
> +       if (!adev->in_gpu_reset && !adev->in_suspend) {
>                 memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
>                 ((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
>                 ((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
> @@ -4682,7 +4682,7 @@ static int gfx_v8_0_kcq_init_queue(struct amdgpu_ring *ring)
>
>                 if (adev->gfx.mec.mqd_backup[mqd_idx])
>                         memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
> -       } else if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */
> +       } else if (adev->in_gpu_reset) { /* for GPU_RESET case */
>                 /* reset MQD to a clean status */
>                 if (adev->gfx.mec.mqd_backup[mqd_idx])
>                         memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> index 93c63ff3b35e..2c5bb282cc01 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> @@ -3686,7 +3686,7 @@ static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring)
>
>         gfx_v9_0_kiq_setting(ring);
>
> -       if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */
> +       if (adev->in_gpu_reset) { /* for GPU_RESET case */
>                 /* reset MQD to a clean status */
>                 if (adev->gfx.mec.mqd_backup[mqd_idx])
>                         memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
> @@ -3724,7 +3724,7 @@ static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring)
>         struct v9_mqd *mqd = ring->mqd_ptr;
>         int mqd_idx = ring - &adev->gfx.compute_ring[0];
>
> -       if (!amdgpu_in_reset(adev) && !adev->in_suspend) {
> +       if (!adev->in_gpu_reset && !adev->in_suspend) {
>                 memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
>                 ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
>                 ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
> @@ -3736,7 +3736,7 @@ static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring)
>
>                 if (adev->gfx.mec.mqd_backup[mqd_idx])
>                         memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
> -       } else if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */
> +       } else if (adev->in_gpu_reset) { /* for GPU_RESET case */
>                 /* reset MQD to a clean status */
>                 if (adev->gfx.mec.mqd_backup[mqd_idx])
>                         memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
> @@ -3930,7 +3930,7 @@ static int gfx_v9_0_hw_fini(void *handle)
>         /* Use deinitialize sequence from CAIL when unbinding device from driver,
>          * otherwise KIQ is hanging when binding back
>          */
> -       if (!amdgpu_in_reset(adev) && !adev->in_suspend) {
> +       if (!adev->in_gpu_reset && !adev->in_suspend) {
>                 mutex_lock(&adev->srbm_mutex);
>                 soc15_grbm_select(adev, adev->gfx.kiq.ring.me,
>                                 adev->gfx.kiq.ring.pipe,
> @@ -4088,7 +4088,7 @@ static uint64_t gfx_v9_0_kiq_read_clock(struct amdgpu_device *adev)
>          *
>          * also don't wait anymore for IRQ context
>          * */
> -       if (r < 1 && (amdgpu_in_reset(adev) || in_interrupt()))
> +       if (r < 1 && (adev->in_gpu_reset || in_interrupt()))
>                 goto failed_kiq_read;
>
>         might_sleep();
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> index 9d3b1245a339..ec8c0af39553 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> @@ -287,7 +287,7 @@ static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
>          */
>         if (adev->gfx.kiq.ring.sched.ready &&
>             (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev)) &&
> -           !amdgpu_in_reset(adev)) {
> +           !adev->in_gpu_reset) {
>
>                 struct amdgpu_vmhub *hub = &adev->vmhub[vmhub];
>                 const unsigned eng = 17;
> @@ -312,7 +312,7 @@ static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
>
>         if (!adev->mman.buffer_funcs_enabled ||
>             !adev->ib_pool_ready ||
> -           amdgpu_in_reset(adev) ||
> +           adev->in_gpu_reset ||
>             ring->sched.ready == false) {
>                 gmc_v10_0_flush_vm_hub(adev, vmid, AMDGPU_GFXHUB_0, 0);
>                 mutex_unlock(&adev->mman.gtt_window_lock);
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
> index 80c146df338a..3ce5c1d2fdf2 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
> @@ -434,7 +434,7 @@ static int gmc_v7_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
>         int vmid;
>         unsigned int tmp;
>
> -       if (amdgpu_in_reset(adev))
> +       if (adev->in_gpu_reset)
>                 return -EIO;
>
>         for (vmid = 1; vmid < 16; vmid++) {
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
> index 9ab65ca7df77..3e6615f9d39c 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
> @@ -635,7 +635,7 @@ static int gmc_v8_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
>         int vmid;
>         unsigned int tmp;
>
> -       if (amdgpu_in_reset(adev))
> +       if (adev->in_gpu_reset)
>                 return -EIO;
>
>         for (vmid = 1; vmid < 16; vmid++) {
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> index 773ee11b3d17..6a780b674018 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> @@ -501,7 +501,7 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
>          */
>         if (adev->gfx.kiq.ring.sched.ready &&
>                         (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev)) &&
> -                       !amdgpu_in_reset(adev)) {
> +                       !adev->in_gpu_reset) {
>                 uint32_t req = hub->vm_inv_eng0_req + hub->eng_distance * eng;
>                 uint32_t ack = hub->vm_inv_eng0_ack + hub->eng_distance * eng;
>
> @@ -596,7 +596,7 @@ static int gmc_v9_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
>         struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
>         struct amdgpu_kiq *kiq = &adev->gfx.kiq;
>
> -       if (amdgpu_in_reset(adev))
> +       if (adev->in_gpu_reset)
>                 return -EIO;
>
>         if (ring->sched.ready) {
> @@ -633,8 +633,7 @@ static int gmc_v9_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
>                 spin_unlock(&adev->gfx.kiq.ring_lock);
>                 r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout);
>                 if (r < 1) {
> -                       dev_info(adev->dev,
> -                               "wait for kiq fence error: %ld\n", r);
> +                       DRM_ERROR("wait for kiq fence error: %ld.\n", r);
>                         return -ETIME;
>                 }
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
> index fe31cbeccfe9..5fd67e1cc2a0 100644
> --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
> +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
> @@ -238,16 +238,20 @@ static void xgpu_ai_mailbox_flr_work(struct work_struct *work)
>         struct amdgpu_virt *virt = container_of(work, struct amdgpu_virt, flr_work);
>         struct amdgpu_device *adev = container_of(virt, struct amdgpu_device, virt);
>         int timeout = AI_MAILBOX_POLL_FLR_TIMEDOUT;
> +       int locked;
>
>         /* block amdgpu_gpu_recover till msg FLR COMPLETE received,
>          * otherwise the mailbox msg will be ruined/reseted by
>          * the VF FLR.
>          *
> -        * we can unlock the reset_sem to allow "amdgpu_job_timedout"
> +        * we can unlock the lock_reset to allow "amdgpu_job_timedout"
>          * to run gpu_recover() after FLR_NOTIFICATION_CMPL received
>          * which means host side had finished this VF's FLR.
>          */
> -       down_read(&adev->reset_sem);
> +       locked = mutex_trylock(&adev->lock_reset);
> +       if (locked)
> +               adev->in_gpu_reset = true;
> +
>         do {
>                 if (xgpu_ai_mailbox_peek_msg(adev) == IDH_FLR_NOTIFICATION_CMPL)
>                         goto flr_done;
> @@ -257,7 +261,10 @@ static void xgpu_ai_mailbox_flr_work(struct work_struct *work)
>         } while (timeout > 1);
>
>  flr_done:
> -       up_read(&adev->reset_sem);
> +       if (locked) {
> +               adev->in_gpu_reset = false;
> +               mutex_unlock(&adev->lock_reset);
> +       }
>
>         /* Trigger recovery for world switch failure if no TDR */
>         if (amdgpu_device_should_recover_gpu(adev)
> diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c
> index 6f55172e8337..ce2bf1fb79ed 100644
> --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c
> +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c
> @@ -259,16 +259,20 @@ static void xgpu_nv_mailbox_flr_work(struct work_struct *work)
>         struct amdgpu_virt *virt = container_of(work, struct amdgpu_virt, flr_work);
>         struct amdgpu_device *adev = container_of(virt, struct amdgpu_device, virt);
>         int timeout = NV_MAILBOX_POLL_FLR_TIMEDOUT;
> +       int locked;
>
>         /* block amdgpu_gpu_recover till msg FLR COMPLETE received,
>          * otherwise the mailbox msg will be ruined/reseted by
>          * the VF FLR.
>          *
> -        * we can unlock the reset_sem to allow "amdgpu_job_timedout"
> +        * we can unlock the lock_reset to allow "amdgpu_job_timedout"
>          * to run gpu_recover() after FLR_NOTIFICATION_CMPL received
>          * which means host side had finished this VF's FLR.
>          */
> -       down_read(&adev->reset_sem);
> +       locked = mutex_trylock(&adev->lock_reset);
> +       if (locked)
> +               adev->in_gpu_reset = true;
> +
>         do {
>                 if (xgpu_nv_mailbox_peek_msg(adev) == IDH_FLR_NOTIFICATION_CMPL)
>                         goto flr_done;
> @@ -278,7 +282,10 @@ static void xgpu_nv_mailbox_flr_work(struct work_struct *work)
>         } while (timeout > 1);
>
>  flr_done:
> -       up_read(&adev->reset_sem);
> +       if (locked) {
> +               adev->in_gpu_reset = false;
> +               mutex_unlock(&adev->lock_reset);
> +       }
>
>         /* Trigger recovery for world switch failure if no TDR */
>         if (amdgpu_device_should_recover_gpu(adev)
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> index 7ad1537820b5..e0e60b0d0669 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> @@ -304,17 +304,15 @@ static void deallocate_vmid(struct device_queue_manager *dqm,
>                                 struct qcm_process_device *qpd,
>                                 struct queue *q)
>  {
> -       if (!dqm->is_resetting) {
> -               /* On GFX v7, CP doesn't flush TC at dequeue */
> -               if (q->device->device_info->asic_family == CHIP_HAWAII)
> -                       if (flush_texture_cache_nocpsch(q->device, qpd))
> -                               pr_err("Failed to flush TC\n");
> +       /* On GFX v7, CP doesn't flush TC at dequeue */
> +       if (q->device->device_info->asic_family == CHIP_HAWAII)
> +               if (flush_texture_cache_nocpsch(q->device, qpd))
> +                       pr_err("Failed to flush TC\n");
>
> -               kfd_flush_tlb(qpd_to_pdd(qpd));
> +       kfd_flush_tlb(qpd_to_pdd(qpd));
>
> -               /* Release the vmid mapping */
> -               set_pasid_vmid_mapping(dqm, 0, qpd->vmid);
> -       }
> +       /* Release the vmid mapping */
> +       set_pasid_vmid_mapping(dqm, 0, qpd->vmid);
>         dqm->vmid_pasid[qpd->vmid] = 0;
>
>         qpd->vmid = 0;
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
> index 71be897d4c2a..013c2b018edc 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
> @@ -1551,10 +1551,6 @@ int kfd_reserved_mem_mmap(struct kfd_dev *dev, struct kfd_process *process,
>  void kfd_flush_tlb(struct kfd_process_device *pdd)
>  {
>         struct kfd_dev *dev = pdd->dev;
> -       struct device_queue_manager *dqm = dev->dqm;
> -
> -       if (dqm->is_resetting)
> -               return;
>
>         if (dev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) {
>                 /* Nothing to flush until a VMID is assigned, which
> diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> index ff5f7f7ceec6..c4daa22904da 100644
> --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> @@ -1658,7 +1658,7 @@ static int dm_suspend(void *handle)
>         struct amdgpu_display_manager *dm = &adev->dm;
>         int ret = 0;
>
> -       if (amdgpu_in_reset(adev)) {
> +       if (adev->in_gpu_reset) {
>                 mutex_lock(&dm->dc_lock);
>                 dm->cached_dc_state = dc_copy_state(dm->dc->current_state);
>
> @@ -1844,7 +1844,7 @@ static int dm_resume(void *handle)
>         struct dc_state *dc_state;
>         int i, r, j;
>
> -       if (amdgpu_in_reset(adev)) {
> +       if (adev->in_gpu_reset) {
>                 dc_state = dm->cached_dc_state;
>
>                 r = dm_dmub_hw_init(adev);
> diff --git a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c
> index 1ffacc712e53..c8e30d59e658 100644
> --- a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c
> +++ b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c
> @@ -1110,7 +1110,7 @@ static int smu_disable_dpms(struct smu_context *smu)
>         struct amdgpu_device *adev = smu->adev;
>         int ret = 0;
>         bool use_baco = !smu->is_apu &&
> -               ((amdgpu_in_reset(adev) &&
> +               ((adev->in_gpu_reset &&
>                   (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO)) ||
>                  ((adev->in_runpm || adev->in_hibernate) && amdgpu_asic_supports_baco(adev)));
>
> diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c
> index da84012b7fd5..c7216362b68d 100644
> --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c
> +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c
> @@ -489,7 +489,7 @@ static int vega20_setup_asic_task(struct pp_hwmgr *hwmgr)
>  {
>         struct amdgpu_device *adev = (struct amdgpu_device *)(hwmgr->adev);
>         int ret = 0;
> -       bool use_baco = (amdgpu_in_reset(adev) &&
> +       bool use_baco = (adev->in_gpu_reset &&
>                          (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO)) ||
>                 (adev->in_runpm && amdgpu_asic_supports_baco(adev));
>
> --
> 2.17.1
>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] drm/amdgpu: revert "fix system hang issue during GPU reset"
  2020-08-12 15:55 ` Alex Deucher
@ 2020-08-13 10:58   ` Christian König
  2020-08-14  1:23   ` Matt Coffin
  1 sibling, 0 replies; 4+ messages in thread
From: Christian König @ 2020-08-13 10:58 UTC (permalink / raw)
  To: Alex Deucher, Li, Dennis; +Cc: amd-gfx list

Am 12.08.20 um 17:55 schrieb Alex Deucher:
> On Wed, Aug 12, 2020 at 11:54 AM Christian König
> <ckoenig.leichtzumerken@gmail.com> wrote:
>> The whole approach wasn't thought through till the end.
>>
>> We already had a reset lock like this in the past and it caused the same problems like this one.
>>
>> Completely revert the patch for now and add individual trylock protection to the hardware access functions as necessary.
>>
>> This reverts commit edad8312cbbf9a33c86873fc4093664f150dd5c1.
>>
>> Signed-off-by: Christian König <christian.koenig@amd.com>
> This also broke GPU overclocking.
>
> Acked-by: Alex Deucher <alexander.deucher@amd.com>

Dennis since we still want to fix the hardware access I suggest to split 
this patch up into the structural changes and individual patches which 
add the lock to the different places where a hardware access happens.

This way we can discuss and eventually fix/revert each hardware access 
individually

Thanks,
Christian.

>
>> ---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu.h           |   9 +-
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c    |  40 +-
>>   .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c    |   2 +-
>>   .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c |   2 +-
>>   .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c |   2 +-
>>   .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c |   2 +-
>>   .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  |   7 -
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c        |   4 -
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c       |   4 -
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c   |  14 +-
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_device.c    |  57 ++-
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c       |   4 -
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c       |   6 +-
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_job.c       |  14 +-
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c       |   4 -
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c        | 353 ++++--------------
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c       |   4 +-
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c       |   4 +-
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c       |   2 +-
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c     |   3 +-
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c      |   2 +-
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h      |   4 +-
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c      |  11 +-
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h      |   3 +-
>>   drivers/gpu/drm/amd/amdgpu/atom.c             |   1 -
>>   drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c        |  10 +-
>>   drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c         |   6 +-
>>   drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c         |  10 +-
>>   drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c        |   4 +-
>>   drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c         |   2 +-
>>   drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c         |   2 +-
>>   drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c         |   7 +-
>>   drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c         |  13 +-
>>   drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c         |  13 +-
>>   .../drm/amd/amdkfd/kfd_device_queue_manager.c |  16 +-
>>   drivers/gpu/drm/amd/amdkfd/kfd_process.c      |   4 -
>>   .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c |   4 +-
>>   drivers/gpu/drm/amd/powerplay/amdgpu_smu.c    |   2 +-
>>   .../drm/amd/powerplay/hwmgr/vega20_hwmgr.c    |   2 +-
>>   39 files changed, 184 insertions(+), 469 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> index 1f9d97f61aa5..9c6fb38ce59d 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> @@ -952,9 +952,9 @@ struct amdgpu_device {
>>          bool                            in_suspend;
>>          bool                            in_hibernate;
>>
>> -       atomic_t                        in_gpu_reset;
>> +       bool                            in_gpu_reset;
>>          enum pp_mp1_state               mp1_state;
>> -       struct rw_semaphore     reset_sem;
>> +       struct mutex  lock_reset;
>>          struct amdgpu_doorbell_index doorbell_index;
>>
>>          struct mutex                    notifier_lock;
>> @@ -1269,9 +1269,4 @@ static inline bool amdgpu_is_tmz(struct amdgpu_device *adev)
>>          return adev->gmc.tmz_enabled;
>>   }
>>
>> -static inline bool amdgpu_in_reset(struct amdgpu_device *adev)
>> -{
>> -       return atomic_read(&adev->in_gpu_reset) ? true : false;
>> -}
>> -
>>   #endif
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
>> index 9738dccb1c2c..0effc1d46824 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
>> @@ -244,14 +244,11 @@ int amdgpu_amdkfd_alloc_gtt_mem(struct kgd_dev *kgd, size_t size,
>>          if (cp_mqd_gfx9)
>>                  bp.flags |= AMDGPU_GEM_CREATE_CP_MQD_GFX9;
>>
>> -       if (!down_read_trylock(&adev->reset_sem))
>> -               return -EIO;
>> -
>>          r = amdgpu_bo_create(adev, &bp, &bo);
>>          if (r) {
>>                  dev_err(adev->dev,
>>                          "failed to allocate BO for amdkfd (%d)\n", r);
>> -               goto err;
>> +               return r;
>>          }
>>
>>          /* map the buffer */
>> @@ -286,7 +283,6 @@ int amdgpu_amdkfd_alloc_gtt_mem(struct kgd_dev *kgd, size_t size,
>>
>>          amdgpu_bo_unreserve(bo);
>>
>> -       up_read(&adev->reset_sem);
>>          return 0;
>>
>>   allocate_mem_kmap_bo_failed:
>> @@ -295,25 +291,19 @@ int amdgpu_amdkfd_alloc_gtt_mem(struct kgd_dev *kgd, size_t size,
>>          amdgpu_bo_unreserve(bo);
>>   allocate_mem_reserve_bo_failed:
>>          amdgpu_bo_unref(&bo);
>> -err:
>> -       up_read(&adev->reset_sem);
>> +
>>          return r;
>>   }
>>
>>   void amdgpu_amdkfd_free_gtt_mem(struct kgd_dev *kgd, void *mem_obj)
>>   {
>> -       struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
>>          struct amdgpu_bo *bo = (struct amdgpu_bo *) mem_obj;
>>
>> -       down_read(&adev->reset_sem);
>> -
>>          amdgpu_bo_reserve(bo, true);
>>          amdgpu_bo_kunmap(bo);
>>          amdgpu_bo_unpin(bo);
>>          amdgpu_bo_unreserve(bo);
>>          amdgpu_bo_unref(&(bo));
>> -
>> -       up_read(&adev->reset_sem);
>>   }
>>
>>   int amdgpu_amdkfd_alloc_gws(struct kgd_dev *kgd, size_t size,
>> @@ -345,14 +335,9 @@ int amdgpu_amdkfd_alloc_gws(struct kgd_dev *kgd, size_t size,
>>
>>   void amdgpu_amdkfd_free_gws(struct kgd_dev *kgd, void *mem_obj)
>>   {
>> -       struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
>>          struct amdgpu_bo *bo = (struct amdgpu_bo *)mem_obj;
>>
>> -       down_read(&adev->reset_sem);
>> -
>>          amdgpu_bo_unref(&bo);
>> -
>> -       up_read(&adev->reset_sem);
>>   }
>>
>>   uint32_t amdgpu_amdkfd_get_fw_version(struct kgd_dev *kgd,
>> @@ -626,15 +611,8 @@ int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum kgd_engine_type engine,
>>          /* This works for NO_HWS. TODO: need to handle without knowing VMID */
>>          job->vmid = vmid;
>>
>> -       if (!down_read_trylock(&adev->reset_sem)) {
>> -               ret = -EIO;
>> -               goto err_ib_sched;
>> -       }
>> -
>>          ret = amdgpu_ib_schedule(ring, 1, ib, job, &f);
>>
>> -       up_read(&adev->reset_sem);
>> -
>>          if (ret) {
>>                  DRM_ERROR("amdgpu: failed to schedule IB.\n");
>>                  goto err_ib_sched;
>> @@ -670,9 +648,6 @@ int amdgpu_amdkfd_flush_gpu_tlb_vmid(struct kgd_dev *kgd, uint16_t vmid)
>>   {
>>          struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
>>
>> -       if (!down_read_trylock(&adev->reset_sem))
>> -               return -EIO;
>> -
>>          if (adev->family == AMDGPU_FAMILY_AI) {
>>                  int i;
>>
>> @@ -682,8 +657,6 @@ int amdgpu_amdkfd_flush_gpu_tlb_vmid(struct kgd_dev *kgd, uint16_t vmid)
>>                  amdgpu_gmc_flush_gpu_tlb(adev, vmid, AMDGPU_GFXHUB_0, 0);
>>          }
>>
>> -       up_read(&adev->reset_sem);
>> -
>>          return 0;
>>   }
>>
>> @@ -692,18 +665,11 @@ int amdgpu_amdkfd_flush_gpu_tlb_pasid(struct kgd_dev *kgd, uint16_t pasid)
>>          struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
>>          const uint32_t flush_type = 0;
>>          bool all_hub = false;
>> -       int ret = -EIO;
>>
>>          if (adev->family == AMDGPU_FAMILY_AI)
>>                  all_hub = true;
>>
>> -       if (down_read_trylock(&adev->reset_sem)) {
>> -               ret = amdgpu_gmc_flush_gpu_tlb_pasid(adev,
>> -                                       pasid, flush_type, all_hub);
>> -               up_read(&adev->reset_sem);
>> -       }
>> -
>> -       return ret;
>> +       return amdgpu_gmc_flush_gpu_tlb_pasid(adev, pasid, flush_type, all_hub);
>>   }
>>
>>   bool amdgpu_amdkfd_have_atomics_support(struct kgd_dev *kgd)
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
>> index b872cdb0b705..691c89705bcd 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
>> @@ -543,7 +543,7 @@ static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd,
>>          uint32_t temp;
>>          struct v10_compute_mqd *m = get_mqd(mqd);
>>
>> -       if (amdgpu_in_reset(adev))
>> +       if (adev->in_gpu_reset)
>>                  return -EIO;
>>
>>   #if 0
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
>> index 832a200bb62f..0b7e78748540 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
>> @@ -425,7 +425,7 @@ static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd,
>>          unsigned long flags, end_jiffies;
>>          int retry;
>>
>> -       if (amdgpu_in_reset(adev))
>> +       if (adev->in_gpu_reset)
>>                  return -EIO;
>>
>>          acquire_queue(kgd, pipe_id, queue_id);
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
>> index d0940121a6a9..ccd635b812b5 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
>> @@ -421,7 +421,7 @@ static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd,
>>          int retry;
>>          struct vi_mqd *m = get_mqd(mqd);
>>
>> -       if (amdgpu_in_reset(adev))
>> +       if (adev->in_gpu_reset)
>>                  return -EIO;
>>
>>          acquire_queue(kgd, pipe_id, queue_id);
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
>> index 7e11625b419e..961424bc7a1f 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
>> @@ -541,7 +541,7 @@ int kgd_gfx_v9_hqd_destroy(struct kgd_dev *kgd, void *mqd,
>>          uint32_t temp;
>>          struct v9_mqd *m = get_mqd(mqd);
>>
>> -       if (amdgpu_in_reset(adev))
>> +       if (adev->in_gpu_reset)
>>                  return -EIO;
>>
>>          acquire_queue(kgd, pipe_id, queue_id);
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>> index 0d75726bd228..7e2394b50fbf 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>> @@ -1194,9 +1194,6 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
>>                  return -EINVAL;
>>          }
>>
>> -       if (!down_read_trylock(&adev->reset_sem))
>> -               return -EIO;
>> -
>>          *mem = kzalloc(sizeof(struct kgd_mem), GFP_KERNEL);
>>          if (!*mem) {
>>                  ret = -ENOMEM;
>> @@ -1263,7 +1260,6 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
>>          if (offset)
>>                  *offset = amdgpu_bo_mmap_offset(bo);
>>
>> -       up_read(&adev->reset_sem);
>>          return 0;
>>
>>   allocate_init_user_pages_failed:
>> @@ -1281,9 +1277,6 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
>>                  sg_free_table(sg);
>>                  kfree(sg);
>>          }
>> -
>> -       up_read(&adev->reset_sem);
>> -
>>          return ret;
>>   }
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>> index a94b3f862fc2..ffbcaf4bfb8b 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>> @@ -1292,8 +1292,6 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
>>          parser.adev = adev;
>>          parser.filp = filp;
>>
>> -       down_read(&adev->reset_sem);
>> -
>>          r = amdgpu_cs_parser_init(&parser, data);
>>          if (r) {
>>                  DRM_ERROR("Failed to initialize parser %d!\n", r);
>> @@ -1333,8 +1331,6 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
>>   out:
>>          amdgpu_cs_parser_fini(&parser, r, reserved_buffers);
>>
>> -       up_read(&adev->reset_sem);
>> -
>>          return r;
>>   }
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
>> index d85d13f7a043..8842c55d4490 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
>> @@ -358,8 +358,6 @@ static int amdgpu_ctx_query2(struct amdgpu_device *adev,
>>          if (atomic_read(&ctx->guilty))
>>                  out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_GUILTY;
>>
>> -       down_read(&adev->reset_sem);
>> -
>>          /*query ue count*/
>>          ras_counter = amdgpu_ras_query_error_count(adev, false);
>>          /*ras counter is monotonic increasing*/
>> @@ -375,8 +373,6 @@ static int amdgpu_ctx_query2(struct amdgpu_device *adev,
>>                  ctx->ras_counter_ce = ras_counter;
>>          }
>>
>> -       up_read(&adev->reset_sem);
>> -
>>          mutex_unlock(&mgr->lock);
>>          return 0;
>>   }
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
>> index 0af249a1e35b..35fed75a4397 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
>> @@ -101,14 +101,14 @@ static int amdgpu_debugfs_autodump_open(struct inode *inode, struct file *file)
>>
>>          file->private_data = adev;
>>
>> -       down_read(&adev->reset_sem);
>> +       mutex_lock(&adev->lock_reset);
>>          if (adev->autodump.dumping.done) {
>>                  reinit_completion(&adev->autodump.dumping);
>>                  ret = 0;
>>          } else {
>>                  ret = -EBUSY;
>>          }
>> -       up_read(&adev->reset_sem);
>> +       mutex_unlock(&adev->lock_reset);
>>
>>          return ret;
>>   }
>> @@ -127,7 +127,7 @@ static unsigned int amdgpu_debugfs_autodump_poll(struct file *file, struct poll_
>>
>>          poll_wait(file, &adev->autodump.gpu_hang, poll_table);
>>
>> -       if (amdgpu_in_reset(adev))
>> +       if (adev->in_gpu_reset)
>>                  return POLLIN | POLLRDNORM | POLLWRNORM;
>>
>>          return 0;
>> @@ -1242,7 +1242,7 @@ static int amdgpu_debugfs_test_ib(struct seq_file *m, void *data)
>>          }
>>
>>          /* Avoid accidently unparking the sched thread during GPU reset */
>> -       down_read(&adev->reset_sem);
>> +       mutex_lock(&adev->lock_reset);
>>
>>          /* hold on the scheduler */
>>          for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
>> @@ -1269,7 +1269,7 @@ static int amdgpu_debugfs_test_ib(struct seq_file *m, void *data)
>>                  kthread_unpark(ring->sched.thread);
>>          }
>>
>> -       up_read(&adev->reset_sem);
>> +       mutex_unlock(&adev->lock_reset);
>>
>>          pm_runtime_mark_last_busy(dev->dev);
>>          pm_runtime_put_autosuspend(dev->dev);
>> @@ -1459,7 +1459,7 @@ static int amdgpu_debugfs_ib_preempt(void *data, u64 val)
>>                  return -ENOMEM;
>>
>>          /* Avoid accidently unparking the sched thread during GPU reset */
>> -       down_read(&adev->reset_sem);
>> +       mutex_lock(&adev->lock_reset);
>>
>>          /* stop the scheduler */
>>          kthread_park(ring->sched.thread);
>> @@ -1500,7 +1500,7 @@ static int amdgpu_debugfs_ib_preempt(void *data, u64 val)
>>          /* restart the scheduler */
>>          kthread_unpark(ring->sched.thread);
>>
>> -       up_read(&adev->reset_sem);
>> +       mutex_unlock(&adev->lock_reset);
>>
>>          ttm_bo_unlock_delayed_workqueue(&adev->mman.bdev, resched);
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>> index fe8878761c29..19aa0d7334c7 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>> @@ -1940,7 +1940,7 @@ static int amdgpu_device_fw_loading(struct amdgpu_device *adev)
>>                          if (adev->ip_blocks[i].status.hw == true)
>>                                  break;
>>
>> -                       if (amdgpu_in_reset(adev) || adev->in_suspend) {
>> +                       if (adev->in_gpu_reset || adev->in_suspend) {
>>                                  r = adev->ip_blocks[i].version->funcs->resume(adev);
>>                                  if (r) {
>>                                          DRM_ERROR("resume of IP block <%s> failed %d\n",
>> @@ -2117,7 +2117,7 @@ static bool amdgpu_device_check_vram_lost(struct amdgpu_device *adev)
>>                          AMDGPU_RESET_MAGIC_NUM))
>>                  return true;
>>
>> -       if (!amdgpu_in_reset(adev))
>> +       if (!adev->in_gpu_reset)
>>                  return false;
>>
>>          /*
>> @@ -3055,8 +3055,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
>>          mutex_init(&adev->mn_lock);
>>          mutex_init(&adev->virt.vf_errors.lock);
>>          hash_init(adev->mn_hash);
>> -       init_rwsem(&adev->reset_sem);
>> -       atomic_set(&adev->in_gpu_reset, 0);
>> +       mutex_init(&adev->lock_reset);
>>          mutex_init(&adev->psp.mutex);
>>          mutex_init(&adev->notifier_lock);
>>
>> @@ -4084,11 +4083,8 @@ static int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive,
>>          list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
>>                  if (need_full_reset) {
>>                          /* post card */
>> -                       if (amdgpu_atom_asic_init(tmp_adev->mode_info.atom_context)) {
>> -                               dev_warn(tmp_adev->dev, "asic atom init failed!");
>> -                               r = -EAGAIN;
>> -                               goto out;
>> -                       }
>> +                       if (amdgpu_atom_asic_init(tmp_adev->mode_info.atom_context))
>> +                               DRM_WARN("asic atom init failed!");
>>
>>                          if (!r) {
>>                                  dev_info(tmp_adev->dev, "GPU reset succeeded, trying to resume\n");
>> @@ -4178,18 +4174,16 @@ static int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive,
>>          return r;
>>   }
>>
>> -static bool amdgpu_device_lock_adev(struct amdgpu_device *adev, struct amdgpu_hive_info *hive)
>> +static bool amdgpu_device_lock_adev(struct amdgpu_device *adev, bool trylock)
>>   {
>> -       if (atomic_cmpxchg(&adev->in_gpu_reset, 0, 1) != 0)
>> -               return false;
>> -
>> -       if (hive) {
>> -               down_write_nest_lock(&adev->reset_sem, &hive->hive_lock);
>> -       } else {
>> -               down_write(&adev->reset_sem);
>> -       }
>> +       if (trylock) {
>> +               if (!mutex_trylock(&adev->lock_reset))
>> +                       return false;
>> +       } else
>> +               mutex_lock(&adev->lock_reset);
>>
>>          atomic_inc(&adev->gpu_reset_counter);
>> +       adev->in_gpu_reset = true;
>>          switch (amdgpu_asic_reset_method(adev)) {
>>          case AMD_RESET_METHOD_MODE1:
>>                  adev->mp1_state = PP_MP1_STATE_SHUTDOWN;
>> @@ -4209,8 +4203,8 @@ static void amdgpu_device_unlock_adev(struct amdgpu_device *adev)
>>   {
>>          amdgpu_vf_error_trans_all(adev);
>>          adev->mp1_state = PP_MP1_STATE_NONE;
>> -       atomic_set(&adev->in_gpu_reset, 0);
>> -       up_write(&adev->reset_sem);
>> +       adev->in_gpu_reset = false;
>> +       mutex_unlock(&adev->lock_reset);
>>   }
>>
>>   static void amdgpu_device_resume_display_audio(struct amdgpu_device *adev)
>> @@ -4320,14 +4314,12 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
>>           * We always reset all schedulers for device and all devices for XGMI
>>           * hive so that should take care of them too.
>>           */
>> -       hive = amdgpu_get_xgmi_hive(adev, false);
>> -       if (hive) {
>> -               if (atomic_cmpxchg(&hive->in_reset, 0, 1) != 0) {
>> -                       DRM_INFO("Bailing on TDR for s_job:%llx, hive: %llx as another already in progress",
>> -                               job ? job->base.id : -1, hive->hive_id);
>> -                       return 0;
>> -               }
>> -               mutex_lock(&hive->hive_lock);
>> +       hive = amdgpu_get_xgmi_hive(adev, true);
>> +       if (hive && !mutex_trylock(&hive->reset_lock)) {
>> +               DRM_INFO("Bailing on TDR for s_job:%llx, hive: %llx as another already in progress",
>> +                         job ? job->base.id : -1, hive->hive_id);
>> +               mutex_unlock(&hive->hive_lock);
>> +               return 0;
>>          }
>>
>>          /*
>> @@ -4349,11 +4341,11 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
>>
>>          /* block all schedulers and reset given job's ring */
>>          list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
>> -               if (!amdgpu_device_lock_adev(tmp_adev, hive)) {
>> +               if (!amdgpu_device_lock_adev(tmp_adev, !hive)) {
>>                          DRM_INFO("Bailing on TDR for s_job:%llx, as another already in progress",
>>                                    job ? job->base.id : -1);
>> -                       r = 0;
>> -                       goto skip_recovery;
>> +                       mutex_unlock(&hive->hive_lock);
>> +                       return 0;
>>                  }
>>
>>                  /*
>> @@ -4486,9 +4478,8 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
>>                  amdgpu_device_unlock_adev(tmp_adev);
>>          }
>>
>> -skip_recovery:
>>          if (hive) {
>> -               atomic_set(&hive->in_reset, 0);
>> +               mutex_unlock(&hive->reset_lock);
>>                  mutex_unlock(&hive->hive_lock);
>>          }
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>> index ee1e8fff83b2..8c64d8d6cb82 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>> @@ -670,8 +670,6 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
>>                  bo_va = NULL;
>>          }
>>
>> -       down_read(&adev->reset_sem);
>> -
>>          switch (args->operation) {
>>          case AMDGPU_VA_OP_MAP:
>>                  va_flags = amdgpu_gem_va_map_flags(adev, args->flags);
>> @@ -701,8 +699,6 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
>>                  amdgpu_gem_va_update_vm(adev, &fpriv->vm, bo_va,
>>                                          args->operation);
>>
>> -       up_read(&adev->reset_sem);
>> -
>>   error_backoff:
>>          ttm_eu_backoff_reservation(&ticket, &list);
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
>> index 8ccd17d02cc6..a819360a4b6a 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
>> @@ -719,7 +719,7 @@ uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg)
>>           *
>>           * also don't wait anymore for IRQ context
>>           * */
>> -       if (r < 1 && (amdgpu_in_reset(adev) || in_interrupt()))
>> +       if (r < 1 && (adev->in_gpu_reset || in_interrupt()))
>>                  goto failed_kiq_read;
>>
>>          might_sleep();
>> @@ -777,7 +777,7 @@ void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
>>           *
>>           * also don't wait anymore for IRQ context
>>           * */
>> -       if (r < 1 && (amdgpu_in_reset(adev) || in_interrupt()))
>> +       if (r < 1 && (adev->in_gpu_reset || in_interrupt()))
>>                  goto failed_kiq_write;
>>
>>          might_sleep();
>> @@ -796,5 +796,5 @@ void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
>>          amdgpu_ring_undo(ring);
>>          spin_unlock_irqrestore(&kiq->ring_lock, flags);
>>   failed_kiq_write:
>> -       dev_warn(adev->dev, "failed to write reg:%x\n", reg);
>> +       pr_err("failed to write reg:%x\n", reg);
>>   }
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
>> index 75d37dfb51aa..937029ad5271 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
>> @@ -220,17 +220,17 @@ static struct dma_fence *amdgpu_job_run(struct drm_sched_job *sched_job)
>>
>>          trace_amdgpu_sched_run_job(job);
>>
>> -       if (down_read_trylock(&ring->adev->reset_sem)) {
>> +       if (job->vram_lost_counter != atomic_read(&ring->adev->vram_lost_counter))
>> +               dma_fence_set_error(finished, -ECANCELED);/* skip IB as well if VRAM lost */
>> +
>> +       if (finished->error < 0) {
>> +               DRM_INFO("Skip scheduling IBs!\n");
>> +       } else {
>>                  r = amdgpu_ib_schedule(ring, job->num_ibs, job->ibs, job,
>> -                                       &fence);
>> -               up_read(&ring->adev->reset_sem);
>> +                                      &fence);
>>                  if (r)
>>                          DRM_ERROR("Error scheduling IBs (%d)\n", r);
>> -       } else {
>> -               dma_fence_set_error(finished, -ECANCELED);
>> -               DRM_INFO("Skip scheduling IBs!\n");
>>          }
>> -
>>          /* if gpu reset, hw fence will be replaced here */
>>          dma_fence_put(job->fence);
>>          job->fence = dma_fence_get(fence);
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
>> index f8de949d2510..b4a9e0478f25 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
>> @@ -1087,8 +1087,6 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev,
>>          if (!fpriv)
>>                  return;
>>
>> -       down_read(&adev->reset_sem);
>> -
>>          pm_runtime_get_sync(dev->dev);
>>
>>          if (amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_UVD) != NULL)
>> @@ -1127,8 +1125,6 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev,
>>
>>          pm_runtime_mark_last_busy(dev->dev);
>>          pm_runtime_put_autosuspend(dev->dev);
>> -
>> -       up_read(&adev->reset_sem);
>>   }
>>
>>   /*
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
>> index 1705e328c6fc..65ad174bb976 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
>> @@ -163,7 +163,7 @@ static ssize_t amdgpu_get_power_dpm_state(struct device *dev,
>>          enum amd_pm_state_type pm;
>>          int ret;
>>
>> -       if (amdgpu_in_reset(adev))
>> +       if (adev->in_gpu_reset)
>>                  return -EPERM;
>>
>>          ret = pm_runtime_get_sync(ddev->dev);
>> @@ -172,8 +172,6 @@ static ssize_t amdgpu_get_power_dpm_state(struct device *dev,
>>                  return ret;
>>          }
>>
>> -       down_read(&adev->reset_sem);
>> -
>>          if (is_support_sw_smu(adev)) {
>>                  if (adev->smu.ppt_funcs->get_current_power_state)
>>                          pm = smu_get_current_power_state(&adev->smu);
>> @@ -185,8 +183,6 @@ static ssize_t amdgpu_get_power_dpm_state(struct device *dev,
>>                  pm = adev->pm.dpm.user_state;
>>          }
>>
>> -       up_read(&adev->reset_sem);
>> -
>>          pm_runtime_mark_last_busy(ddev->dev);
>>          pm_runtime_put_autosuspend(ddev->dev);
>>
>> @@ -205,7 +201,7 @@ static ssize_t amdgpu_set_power_dpm_state(struct device *dev,
>>          enum amd_pm_state_type  state;
>>          int ret;
>>
>> -       if (amdgpu_in_reset(adev))
>> +       if (adev->in_gpu_reset)
>>                  return -EPERM;
>>
>>          if (strncmp("battery", buf, strlen("battery")) == 0)
>> @@ -223,8 +219,6 @@ static ssize_t amdgpu_set_power_dpm_state(struct device *dev,
>>                  return ret;
>>          }
>>
>> -       down_read(&adev->reset_sem);
>> -
>>          if (is_support_sw_smu(adev)) {
>>                  mutex_lock(&adev->pm.mutex);
>>                  adev->pm.dpm.user_state = state;
>> @@ -238,9 +232,6 @@ static ssize_t amdgpu_set_power_dpm_state(struct device *dev,
>>
>>                  amdgpu_pm_compute_clocks(adev);
>>          }
>> -
>> -       up_read(&adev->reset_sem);
>> -
>>          pm_runtime_mark_last_busy(ddev->dev);
>>          pm_runtime_put_autosuspend(ddev->dev);
>>
>> @@ -316,7 +307,7 @@ static ssize_t amdgpu_get_power_dpm_force_performance_level(struct device *dev,
>>          enum amd_dpm_forced_level level = 0xff;
>>          int ret;
>>
>> -       if (amdgpu_in_reset(adev))
>> +       if (adev->in_gpu_reset)
>>                  return -EPERM;
>>
>>          ret = pm_runtime_get_sync(ddev->dev);
>> @@ -325,8 +316,6 @@ static ssize_t amdgpu_get_power_dpm_force_performance_level(struct device *dev,
>>                  return ret;
>>          }
>>
>> -       down_read(&adev->reset_sem);
>> -
>>          if (is_support_sw_smu(adev))
>>                  level = smu_get_performance_level(&adev->smu);
>>          else if (adev->powerplay.pp_funcs->get_performance_level)
>> @@ -334,8 +323,6 @@ static ssize_t amdgpu_get_power_dpm_force_performance_level(struct device *dev,
>>          else
>>                  level = adev->pm.dpm.forced_level;
>>
>> -       up_read(&adev->reset_sem);
>> -
>>          pm_runtime_mark_last_busy(ddev->dev);
>>          pm_runtime_put_autosuspend(ddev->dev);
>>
>> @@ -362,7 +349,7 @@ static ssize_t amdgpu_set_power_dpm_force_performance_level(struct device *dev,
>>          enum amd_dpm_forced_level current_level = 0xff;
>>          int ret = 0;
>>
>> -       if (amdgpu_in_reset(adev))
>> +       if (adev->in_gpu_reset)
>>                  return -EPERM;
>>
>>          if (strncmp("low", buf, strlen("low")) == 0) {
>> @@ -393,8 +380,6 @@ static ssize_t amdgpu_set_power_dpm_force_performance_level(struct device *dev,
>>                  return ret;
>>          }
>>
>> -       down_read(&adev->reset_sem);
>> -
>>          if (is_support_sw_smu(adev))
>>                  current_level = smu_get_performance_level(&adev->smu);
>>          else if (adev->powerplay.pp_funcs->get_performance_level)
>> @@ -403,8 +388,7 @@ static ssize_t amdgpu_set_power_dpm_force_performance_level(struct device *dev,
>>          if (current_level == level) {
>>                  pm_runtime_mark_last_busy(ddev->dev);
>>                  pm_runtime_put_autosuspend(ddev->dev);
>> -               ret = count;
>> -               goto pro_end;
>> +               return count;
>>          }
>>
>>          if (adev->asic_type == CHIP_RAVEN) {
>> @@ -425,8 +409,7 @@ static ssize_t amdgpu_set_power_dpm_force_performance_level(struct device *dev,
>>                  pr_err("Currently not in any profile mode!\n");
>>                  pm_runtime_mark_last_busy(ddev->dev);
>>                  pm_runtime_put_autosuspend(ddev->dev);
>> -               ret = -EINVAL;
>> -               goto pro_end;
>> +               return -EINVAL;
>>          }
>>
>>          if (is_support_sw_smu(adev)) {
>> @@ -434,8 +417,7 @@ static ssize_t amdgpu_set_power_dpm_force_performance_level(struct device *dev,
>>                  if (ret) {
>>                          pm_runtime_mark_last_busy(ddev->dev);
>>                          pm_runtime_put_autosuspend(ddev->dev);
>> -                       ret = -EINVAL;
>> -                       goto pro_end;
>> +                       return -EINVAL;
>>                  }
>>          } else if (adev->powerplay.pp_funcs->force_performance_level) {
>>                  mutex_lock(&adev->pm.mutex);
>> @@ -443,16 +425,14 @@ static ssize_t amdgpu_set_power_dpm_force_performance_level(struct device *dev,
>>                          mutex_unlock(&adev->pm.mutex);
>>                          pm_runtime_mark_last_busy(ddev->dev);
>>                          pm_runtime_put_autosuspend(ddev->dev);
>> -                       ret = -EINVAL;
>> -                       goto pro_end;
>> +                       return -EINVAL;
>>                  }
>>                  ret = amdgpu_dpm_force_performance_level(adev, level);
>>                  if (ret) {
>>                          mutex_unlock(&adev->pm.mutex);
>>                          pm_runtime_mark_last_busy(ddev->dev);
>>                          pm_runtime_put_autosuspend(ddev->dev);
>> -                       ret = -EINVAL;
>> -                       goto pro_end;
>> +                       return -EINVAL;
>>                  } else {
>>                          adev->pm.dpm.forced_level = level;
>>                  }
>> @@ -461,9 +441,7 @@ static ssize_t amdgpu_set_power_dpm_force_performance_level(struct device *dev,
>>          pm_runtime_mark_last_busy(ddev->dev);
>>          pm_runtime_put_autosuspend(ddev->dev);
>>
>> -pro_end:
>> -       up_read(&adev->reset_sem);
>> -       return ret;
>> +       return count;
>>   }
>>
>>   static ssize_t amdgpu_get_pp_num_states(struct device *dev,
>> @@ -475,7 +453,7 @@ static ssize_t amdgpu_get_pp_num_states(struct device *dev,
>>          struct pp_states_info data;
>>          int i, buf_len, ret;
>>
>> -       if (amdgpu_in_reset(adev))
>> +       if (adev->in_gpu_reset)
>>                  return -EPERM;
>>
>>          ret = pm_runtime_get_sync(ddev->dev);
>> @@ -519,7 +497,7 @@ static ssize_t amdgpu_get_pp_cur_state(struct device *dev,
>>          enum amd_pm_state_type pm = 0;
>>          int i = 0, ret = 0;
>>
>> -       if (amdgpu_in_reset(adev))
>> +       if (adev->in_gpu_reset)
>>                  return -EPERM;
>>
>>          ret = pm_runtime_get_sync(ddev->dev);
>> @@ -560,7 +538,7 @@ static ssize_t amdgpu_get_pp_force_state(struct device *dev,
>>          struct drm_device *ddev = dev_get_drvdata(dev);
>>          struct amdgpu_device *adev = ddev->dev_private;
>>
>> -       if (amdgpu_in_reset(adev))
>> +       if (adev->in_gpu_reset)
>>                  return -EPERM;
>>
>>          if (adev->pp_force_state_enabled)
>> @@ -580,7 +558,7 @@ static ssize_t amdgpu_set_pp_force_state(struct device *dev,
>>          unsigned long idx;
>>          int ret;
>>
>> -       if (amdgpu_in_reset(adev))
>> +       if (adev->in_gpu_reset)
>>                  return -EPERM;
>>
>>          if (strlen(buf) == 1)
>> @@ -606,7 +584,6 @@ static ssize_t amdgpu_set_pp_force_state(struct device *dev,
>>                          return ret;
>>                  }
>>
>> -               down_read(&adev->reset_sem);
>>                  /* only set user selected power states */
>>                  if (state != POWER_STATE_TYPE_INTERNAL_BOOT &&
>>                      state != POWER_STATE_TYPE_DEFAULT) {
>> @@ -614,8 +591,6 @@ static ssize_t amdgpu_set_pp_force_state(struct device *dev,
>>                                          AMD_PP_TASK_ENABLE_USER_STATE, &state);
>>                          adev->pp_force_state_enabled = true;
>>                  }
>> -               up_read(&adev->reset_sem);
>> -
>>                  pm_runtime_mark_last_busy(ddev->dev);
>>                  pm_runtime_put_autosuspend(ddev->dev);
>>          }
>> @@ -643,7 +618,7 @@ static ssize_t amdgpu_get_pp_table(struct device *dev,
>>          char *table = NULL;
>>          int size, ret;
>>
>> -       if (amdgpu_in_reset(adev))
>> +       if (adev->in_gpu_reset)
>>                  return -EPERM;
>>
>>          ret = pm_runtime_get_sync(ddev->dev);
>> @@ -687,7 +662,7 @@ static ssize_t amdgpu_set_pp_table(struct device *dev,
>>          struct amdgpu_device *adev = ddev->dev_private;
>>          int ret = 0;
>>
>> -       if (amdgpu_in_reset(adev))
>> +       if (adev->in_gpu_reset)
>>                  return -EPERM;
>>
>>          ret = pm_runtime_get_sync(ddev->dev);
>> @@ -696,21 +671,16 @@ static ssize_t amdgpu_set_pp_table(struct device *dev,
>>                  return ret;
>>          }
>>
>> -       down_read(&adev->reset_sem);
>> -
>>          if (is_support_sw_smu(adev)) {
>>                  ret = smu_sys_set_pp_table(&adev->smu, (void *)buf, count);
>>                  if (ret) {
>>                          pm_runtime_mark_last_busy(ddev->dev);
>>                          pm_runtime_put_autosuspend(ddev->dev);
>> -                       up_read(&adev->reset_sem);
>>                          return ret;
>>                  }
>>          } else if (adev->powerplay.pp_funcs->set_pp_table)
>>                  amdgpu_dpm_set_pp_table(adev, buf, count);
>>
>> -       up_read(&adev->reset_sem);
>> -
>>          pm_runtime_mark_last_busy(ddev->dev);
>>          pm_runtime_put_autosuspend(ddev->dev);
>>
>> @@ -845,7 +815,7 @@ static ssize_t amdgpu_set_pp_od_clk_voltage(struct device *dev,
>>          const char delimiter[3] = {' ', '\n', '\0'};
>>          uint32_t type;
>>
>> -       if (amdgpu_in_reset(adev))
>> +       if (adev->in_gpu_reset)
>>                  return -EPERM;
>>
>>          if (count > 127)
>> @@ -889,10 +859,6 @@ static ssize_t amdgpu_set_pp_od_clk_voltage(struct device *dev,
>>                  return ret;
>>          }
>>
>> -       ret = count;
>> -
>> -       down_read(&adev->reset_sem);
>> -
>>          if (is_support_sw_smu(adev)) {
>>                  ret = smu_od_edit_dpm_table(&adev->smu, type,
>>                                              parameter, parameter_size);
>> @@ -900,8 +866,7 @@ static ssize_t amdgpu_set_pp_od_clk_voltage(struct device *dev,
>>                  if (ret) {
>>                          pm_runtime_mark_last_busy(ddev->dev);
>>                          pm_runtime_put_autosuspend(ddev->dev);
>> -                       ret = -EINVAL;
>> -                       goto pro_end;
>> +                       return -EINVAL;
>>                  }
>>          } else {
>>                  if (adev->powerplay.pp_funcs->odn_edit_dpm_table) {
>> @@ -910,8 +875,7 @@ static ssize_t amdgpu_set_pp_od_clk_voltage(struct device *dev,
>>                          if (ret) {
>>                                  pm_runtime_mark_last_busy(ddev->dev);
>>                                  pm_runtime_put_autosuspend(ddev->dev);
>> -                               ret = -EINVAL;
>> -                               goto pro_end;
>> +                               return -EINVAL;
>>                          }
>>                  }
>>
>> @@ -922,22 +886,18 @@ static ssize_t amdgpu_set_pp_od_clk_voltage(struct device *dev,
>>                                                  NULL);
>>                                  pm_runtime_mark_last_busy(ddev->dev);
>>                                  pm_runtime_put_autosuspend(ddev->dev);
>> -                               ret = count;
>> -                               goto pro_end;
>> +                               return count;
>>                          } else {
>>                                  pm_runtime_mark_last_busy(ddev->dev);
>>                                  pm_runtime_put_autosuspend(ddev->dev);
>> -                               ret = -EINVAL;
>> -                               goto pro_end;
>> +                               return -EINVAL;
>>                          }
>>                  }
>>          }
>>          pm_runtime_mark_last_busy(ddev->dev);
>>          pm_runtime_put_autosuspend(ddev->dev);
>>
>> -pro_end:
>> -       up_read(&adev->reset_sem);
>> -       return ret;
>> +       return count;
>>   }
>>
>>   static ssize_t amdgpu_get_pp_od_clk_voltage(struct device *dev,
>> @@ -949,7 +909,7 @@ static ssize_t amdgpu_get_pp_od_clk_voltage(struct device *dev,
>>          ssize_t size;
>>          int ret;
>>
>> -       if (amdgpu_in_reset(adev))
>> +       if (adev->in_gpu_reset)
>>                  return -EPERM;
>>
>>          ret = pm_runtime_get_sync(ddev->dev);
>> @@ -1003,7 +963,7 @@ static ssize_t amdgpu_set_pp_features(struct device *dev,
>>          uint64_t featuremask;
>>          int ret;
>>
>> -       if (amdgpu_in_reset(adev))
>> +       if (adev->in_gpu_reset)
>>                  return -EPERM;
>>
>>          ret = kstrtou64(buf, 0, &featuremask);
>> @@ -1018,13 +978,11 @@ static ssize_t amdgpu_set_pp_features(struct device *dev,
>>                  return ret;
>>          }
>>
>> -       down_read(&adev->reset_sem);
>>          if (is_support_sw_smu(adev)) {
>>                  ret = smu_sys_set_pp_feature_mask(&adev->smu, featuremask);
>>                  if (ret) {
>>                          pm_runtime_mark_last_busy(ddev->dev);
>>                          pm_runtime_put_autosuspend(ddev->dev);
>> -                       up_read(&adev->reset_sem);
>>                          return -EINVAL;
>>                  }
>>          } else if (adev->powerplay.pp_funcs->set_ppfeature_status) {
>> @@ -1032,12 +990,9 @@ static ssize_t amdgpu_set_pp_features(struct device *dev,
>>                  if (ret) {
>>                          pm_runtime_mark_last_busy(ddev->dev);
>>                          pm_runtime_put_autosuspend(ddev->dev);
>> -                       up_read(&adev->reset_sem);
>>                          return -EINVAL;
>>                  }
>>          }
>> -       up_read(&adev->reset_sem);
>> -
>>          pm_runtime_mark_last_busy(ddev->dev);
>>          pm_runtime_put_autosuspend(ddev->dev);
>>
>> @@ -1053,7 +1008,7 @@ static ssize_t amdgpu_get_pp_features(struct device *dev,
>>          ssize_t size;
>>          int ret;
>>
>> -       if (amdgpu_in_reset(adev))
>> +       if (adev->in_gpu_reset)
>>                  return -EPERM;
>>
>>          ret = pm_runtime_get_sync(ddev->dev);
>> @@ -1062,8 +1017,6 @@ static ssize_t amdgpu_get_pp_features(struct device *dev,
>>                  return ret;
>>          }
>>
>> -       down_read(&adev->reset_sem);
>> -
>>          if (is_support_sw_smu(adev))
>>                  size = smu_sys_get_pp_feature_mask(&adev->smu, buf);
>>          else if (adev->powerplay.pp_funcs->get_ppfeature_status)
>> @@ -1071,8 +1024,6 @@ static ssize_t amdgpu_get_pp_features(struct device *dev,
>>          else
>>                  size = snprintf(buf, PAGE_SIZE, "\n");
>>
>> -       up_read(&adev->reset_sem);
>> -
>>          pm_runtime_mark_last_busy(ddev->dev);
>>          pm_runtime_put_autosuspend(ddev->dev);
>>
>> @@ -1118,7 +1069,7 @@ static ssize_t amdgpu_get_pp_dpm_sclk(struct device *dev,
>>          ssize_t size;
>>          int ret;
>>
>> -       if (amdgpu_in_reset(adev))
>> +       if (adev->in_gpu_reset)
>>                  return -EPERM;
>>
>>          ret = pm_runtime_get_sync(ddev->dev);
>> @@ -1127,8 +1078,6 @@ static ssize_t amdgpu_get_pp_dpm_sclk(struct device *dev,
>>                  return ret;
>>          }
>>
>> -       down_read(&adev->reset_sem);
>> -
>>          if (is_support_sw_smu(adev))
>>                  size = smu_print_clk_levels(&adev->smu, SMU_SCLK, buf);
>>          else if (adev->powerplay.pp_funcs->print_clock_levels)
>> @@ -1136,8 +1085,6 @@ static ssize_t amdgpu_get_pp_dpm_sclk(struct device *dev,
>>          else
>>                  size = snprintf(buf, PAGE_SIZE, "\n");
>>
>> -       up_read(&adev->reset_sem);
>> -
>>          pm_runtime_mark_last_busy(ddev->dev);
>>          pm_runtime_put_autosuspend(ddev->dev);
>>
>> @@ -1190,7 +1137,7 @@ static ssize_t amdgpu_set_pp_dpm_sclk(struct device *dev,
>>          int ret;
>>          uint32_t mask = 0;
>>
>> -       if (amdgpu_in_reset(adev))
>> +       if (adev->in_gpu_reset)
>>                  return -EPERM;
>>
>>          ret = amdgpu_read_mask(buf, count, &mask);
>> @@ -1203,15 +1150,11 @@ static ssize_t amdgpu_set_pp_dpm_sclk(struct device *dev,
>>                  return ret;
>>          }
>>
>> -       down_read(&adev->reset_sem);
>> -
>>          if (is_support_sw_smu(adev))
>>                  ret = smu_force_clk_levels(&adev->smu, SMU_SCLK, mask);
>>          else if (adev->powerplay.pp_funcs->force_clock_level)
>>                  ret = amdgpu_dpm_force_clock_level(adev, PP_SCLK, mask);
>>
>> -       up_read(&adev->reset_sem);
>> -
>>          pm_runtime_mark_last_busy(ddev->dev);
>>          pm_runtime_put_autosuspend(ddev->dev);
>>
>> @@ -1230,7 +1173,7 @@ static ssize_t amdgpu_get_pp_dpm_mclk(struct device *dev,
>>          ssize_t size;
>>          int ret;
>>
>> -       if (amdgpu_in_reset(adev))
>> +       if (adev->in_gpu_reset)
>>                  return -EPERM;
>>
>>          ret = pm_runtime_get_sync(ddev->dev);
>> @@ -1239,8 +1182,6 @@ static ssize_t amdgpu_get_pp_dpm_mclk(struct device *dev,
>>                  return ret;
>>          }
>>
>> -       down_read(&adev->reset_sem);
>> -
>>          if (is_support_sw_smu(adev))
>>                  size = smu_print_clk_levels(&adev->smu, SMU_MCLK, buf);
>>          else if (adev->powerplay.pp_funcs->print_clock_levels)
>> @@ -1248,8 +1189,6 @@ static ssize_t amdgpu_get_pp_dpm_mclk(struct device *dev,
>>          else
>>                  size = snprintf(buf, PAGE_SIZE, "\n");
>>
>> -       up_read(&adev->reset_sem);
>> -
>>          pm_runtime_mark_last_busy(ddev->dev);
>>          pm_runtime_put_autosuspend(ddev->dev);
>>
>> @@ -1266,7 +1205,7 @@ static ssize_t amdgpu_set_pp_dpm_mclk(struct device *dev,
>>          uint32_t mask = 0;
>>          int ret;
>>
>> -       if (amdgpu_in_reset(adev))
>> +       if (adev->in_gpu_reset)
>>                  return -EPERM;
>>
>>          ret = amdgpu_read_mask(buf, count, &mask);
>> @@ -1279,15 +1218,11 @@ static ssize_t amdgpu_set_pp_dpm_mclk(struct device *dev,
>>                  return ret;
>>          }
>>
>> -       down_read(&adev->reset_sem);
>> -
>>          if (is_support_sw_smu(adev))
>>                  ret = smu_force_clk_levels(&adev->smu, SMU_MCLK, mask);
>>          else if (adev->powerplay.pp_funcs->force_clock_level)
>>                  ret = amdgpu_dpm_force_clock_level(adev, PP_MCLK, mask);
>>
>> -       up_read(&adev->reset_sem);
>> -
>>          pm_runtime_mark_last_busy(ddev->dev);
>>          pm_runtime_put_autosuspend(ddev->dev);
>>
>> @@ -1306,7 +1241,7 @@ static ssize_t amdgpu_get_pp_dpm_socclk(struct device *dev,
>>          ssize_t size;
>>          int ret;
>>
>> -       if (amdgpu_in_reset(adev))
>> +       if (adev->in_gpu_reset)
>>                  return -EPERM;
>>
>>          ret = pm_runtime_get_sync(ddev->dev);
>> @@ -1315,8 +1250,6 @@ static ssize_t amdgpu_get_pp_dpm_socclk(struct device *dev,
>>                  return ret;
>>          }
>>
>> -       down_read(&adev->reset_sem);
>> -
>>          if (is_support_sw_smu(adev))
>>                  size = smu_print_clk_levels(&adev->smu, SMU_SOCCLK, buf);
>>          else if (adev->powerplay.pp_funcs->print_clock_levels)
>> @@ -1324,8 +1257,6 @@ static ssize_t amdgpu_get_pp_dpm_socclk(struct device *dev,
>>          else
>>                  size = snprintf(buf, PAGE_SIZE, "\n");
>>
>> -       up_read(&adev->reset_sem);
>> -
>>          pm_runtime_mark_last_busy(ddev->dev);
>>          pm_runtime_put_autosuspend(ddev->dev);
>>
>> @@ -1342,7 +1273,7 @@ static ssize_t amdgpu_set_pp_dpm_socclk(struct device *dev,
>>          int ret;
>>          uint32_t mask = 0;
>>
>> -       if (amdgpu_in_reset(adev))
>> +       if (adev->in_gpu_reset)
>>                  return -EPERM;
>>
>>          ret = amdgpu_read_mask(buf, count, &mask);
>> @@ -1355,8 +1286,6 @@ static ssize_t amdgpu_set_pp_dpm_socclk(struct device *dev,
>>                  return ret;
>>          }
>>
>> -       down_read(&adev->reset_sem);
>> -
>>          if (is_support_sw_smu(adev))
>>                  ret = smu_force_clk_levels(&adev->smu, SMU_SOCCLK, mask);
>>          else if (adev->powerplay.pp_funcs->force_clock_level)
>> @@ -1364,8 +1293,6 @@ static ssize_t amdgpu_set_pp_dpm_socclk(struct device *dev,
>>          else
>>                  ret = 0;
>>
>> -       up_read(&adev->reset_sem);
>> -
>>          pm_runtime_mark_last_busy(ddev->dev);
>>          pm_runtime_put_autosuspend(ddev->dev);
>>
>> @@ -1384,7 +1311,7 @@ static ssize_t amdgpu_get_pp_dpm_fclk(struct device *dev,
>>          ssize_t size;
>>          int ret;
>>
>> -       if (amdgpu_in_reset(adev))
>> +       if (adev->in_gpu_reset)
>>                  return -EPERM;
>>
>>          ret = pm_runtime_get_sync(ddev->dev);
>> @@ -1393,8 +1320,6 @@ static ssize_t amdgpu_get_pp_dpm_fclk(struct device *dev,
>>                  return ret;
>>          }
>>
>> -       down_read(&adev->reset_sem);
>> -
>>          if (is_support_sw_smu(adev))
>>                  size = smu_print_clk_levels(&adev->smu, SMU_FCLK, buf);
>>          else if (adev->powerplay.pp_funcs->print_clock_levels)
>> @@ -1402,8 +1327,6 @@ static ssize_t amdgpu_get_pp_dpm_fclk(struct device *dev,
>>          else
>>                  size = snprintf(buf, PAGE_SIZE, "\n");
>>
>> -       up_read(&adev->reset_sem);
>> -
>>          pm_runtime_mark_last_busy(ddev->dev);
>>          pm_runtime_put_autosuspend(ddev->dev);
>>
>> @@ -1420,7 +1343,7 @@ static ssize_t amdgpu_set_pp_dpm_fclk(struct device *dev,
>>          int ret;
>>          uint32_t mask = 0;
>>
>> -       if (amdgpu_in_reset(adev))
>> +       if (adev->in_gpu_reset)
>>                  return -EPERM;
>>
>>          ret = amdgpu_read_mask(buf, count, &mask);
>> @@ -1433,8 +1356,6 @@ static ssize_t amdgpu_set_pp_dpm_fclk(struct device *dev,
>>                  return ret;
>>          }
>>
>> -       down_read(&adev->reset_sem);
>> -
>>          if (is_support_sw_smu(adev))
>>                  ret = smu_force_clk_levels(&adev->smu, SMU_FCLK, mask);
>>          else if (adev->powerplay.pp_funcs->force_clock_level)
>> @@ -1442,8 +1363,6 @@ static ssize_t amdgpu_set_pp_dpm_fclk(struct device *dev,
>>          else
>>                  ret = 0;
>>
>> -       up_read(&adev->reset_sem);
>> -
>>          pm_runtime_mark_last_busy(ddev->dev);
>>          pm_runtime_put_autosuspend(ddev->dev);
>>
>> @@ -1462,7 +1381,7 @@ static ssize_t amdgpu_get_pp_dpm_dcefclk(struct device *dev,
>>          ssize_t size;
>>          int ret;
>>
>> -       if (amdgpu_in_reset(adev))
>> +       if (adev->in_gpu_reset)
>>                  return -EPERM;
>>
>>          ret = pm_runtime_get_sync(ddev->dev);
>> @@ -1471,8 +1390,6 @@ static ssize_t amdgpu_get_pp_dpm_dcefclk(struct device *dev,
>>                  return ret;
>>          }
>>
>> -       down_read(&adev->reset_sem);
>> -
>>          if (is_support_sw_smu(adev))
>>                  size = smu_print_clk_levels(&adev->smu, SMU_DCEFCLK, buf);
>>          else if (adev->powerplay.pp_funcs->print_clock_levels)
>> @@ -1480,8 +1397,6 @@ static ssize_t amdgpu_get_pp_dpm_dcefclk(struct device *dev,
>>          else
>>                  size = snprintf(buf, PAGE_SIZE, "\n");
>>
>> -       up_read(&adev->reset_sem);
>> -
>>          pm_runtime_mark_last_busy(ddev->dev);
>>          pm_runtime_put_autosuspend(ddev->dev);
>>
>> @@ -1498,7 +1413,7 @@ static ssize_t amdgpu_set_pp_dpm_dcefclk(struct device *dev,
>>          int ret;
>>          uint32_t mask = 0;
>>
>> -       if (amdgpu_in_reset(adev))
>> +       if (adev->in_gpu_reset)
>>                  return -EPERM;
>>
>>          ret = amdgpu_read_mask(buf, count, &mask);
>> @@ -1511,8 +1426,6 @@ static ssize_t amdgpu_set_pp_dpm_dcefclk(struct device *dev,
>>                  return ret;
>>          }
>>
>> -       down_read(&adev->reset_sem);
>> -
>>          if (is_support_sw_smu(adev))
>>                  ret = smu_force_clk_levels(&adev->smu, SMU_DCEFCLK, mask);
>>          else if (adev->powerplay.pp_funcs->force_clock_level)
>> @@ -1520,8 +1433,6 @@ static ssize_t amdgpu_set_pp_dpm_dcefclk(struct device *dev,
>>          else
>>                  ret = 0;
>>
>> -       up_read(&adev->reset_sem);
>> -
>>          pm_runtime_mark_last_busy(ddev->dev);
>>          pm_runtime_put_autosuspend(ddev->dev);
>>
>> @@ -1540,7 +1451,7 @@ static ssize_t amdgpu_get_pp_dpm_pcie(struct device *dev,
>>          ssize_t size;
>>          int ret;
>>
>> -       if (amdgpu_in_reset(adev))
>> +       if (adev->in_gpu_reset)
>>                  return -EPERM;
>>
>>          ret = pm_runtime_get_sync(ddev->dev);
>> @@ -1549,8 +1460,6 @@ static ssize_t amdgpu_get_pp_dpm_pcie(struct device *dev,
>>                  return ret;
>>          }
>>
>> -       down_read(&adev->reset_sem);
>> -
>>          if (is_support_sw_smu(adev))
>>                  size = smu_print_clk_levels(&adev->smu, SMU_PCIE, buf);
>>          else if (adev->powerplay.pp_funcs->print_clock_levels)
>> @@ -1558,8 +1467,6 @@ static ssize_t amdgpu_get_pp_dpm_pcie(struct device *dev,
>>          else
>>                  size = snprintf(buf, PAGE_SIZE, "\n");
>>
>> -       up_read(&adev->reset_sem);
>> -
>>          pm_runtime_mark_last_busy(ddev->dev);
>>          pm_runtime_put_autosuspend(ddev->dev);
>>
>> @@ -1576,7 +1483,7 @@ static ssize_t amdgpu_set_pp_dpm_pcie(struct device *dev,
>>          int ret;
>>          uint32_t mask = 0;
>>
>> -       if (amdgpu_in_reset(adev))
>> +       if (adev->in_gpu_reset)
>>                  return -EPERM;
>>
>>          ret = amdgpu_read_mask(buf, count, &mask);
>> @@ -1589,8 +1496,6 @@ static ssize_t amdgpu_set_pp_dpm_pcie(struct device *dev,
>>                  return ret;
>>          }
>>
>> -       down_read(&adev->reset_sem);
>> -
>>          if (is_support_sw_smu(adev))
>>                  ret = smu_force_clk_levels(&adev->smu, SMU_PCIE, mask);
>>          else if (adev->powerplay.pp_funcs->force_clock_level)
>> @@ -1598,8 +1503,6 @@ static ssize_t amdgpu_set_pp_dpm_pcie(struct device *dev,
>>          else
>>                  ret = 0;
>>
>> -       up_read(&adev->reset_sem);
>> -
>>          pm_runtime_mark_last_busy(ddev->dev);
>>          pm_runtime_put_autosuspend(ddev->dev);
>>
>> @@ -1618,7 +1521,7 @@ static ssize_t amdgpu_get_pp_sclk_od(struct device *dev,
>>          uint32_t value = 0;
>>          int ret;
>>
>> -       if (amdgpu_in_reset(adev))
>> +       if (adev->in_gpu_reset)
>>                  return -EPERM;
>>
>>          ret = pm_runtime_get_sync(ddev->dev);
>> @@ -1627,15 +1530,11 @@ static ssize_t amdgpu_get_pp_sclk_od(struct device *dev,
>>                  return ret;
>>          }
>>
>> -       down_read(&adev->reset_sem);
>> -
>>          if (is_support_sw_smu(adev))
>>                  value = smu_get_od_percentage(&(adev->smu), SMU_OD_SCLK);
>>          else if (adev->powerplay.pp_funcs->get_sclk_od)
>>                  value = amdgpu_dpm_get_sclk_od(adev);
>>
>> -       up_read(&adev->reset_sem);
>> -
>>          pm_runtime_mark_last_busy(ddev->dev);
>>          pm_runtime_put_autosuspend(ddev->dev);
>>
>> @@ -1652,7 +1551,7 @@ static ssize_t amdgpu_set_pp_sclk_od(struct device *dev,
>>          int ret;
>>          long int value;
>>
>> -       if (amdgpu_in_reset(adev))
>> +       if (adev->in_gpu_reset)
>>                  return -EPERM;
>>
>>          ret = kstrtol(buf, 0, &value);
>> @@ -1666,8 +1565,6 @@ static ssize_t amdgpu_set_pp_sclk_od(struct device *dev,
>>                  return ret;
>>          }
>>
>> -       down_read(&adev->reset_sem);
>> -
>>          if (is_support_sw_smu(adev)) {
>>                  value = smu_set_od_percentage(&(adev->smu), SMU_OD_SCLK, (uint32_t)value);
>>          } else {
>> @@ -1682,8 +1579,6 @@ static ssize_t amdgpu_set_pp_sclk_od(struct device *dev,
>>                  }
>>          }
>>
>> -       up_read(&adev->reset_sem);
>> -
>>          pm_runtime_mark_last_busy(ddev->dev);
>>          pm_runtime_put_autosuspend(ddev->dev);
>>
>> @@ -1699,7 +1594,7 @@ static ssize_t amdgpu_get_pp_mclk_od(struct device *dev,
>>          uint32_t value = 0;
>>          int ret;
>>
>> -       if (amdgpu_in_reset(adev))
>> +       if (adev->in_gpu_reset)
>>                  return -EPERM;
>>
>>          ret = pm_runtime_get_sync(ddev->dev);
>> @@ -1708,15 +1603,11 @@ static ssize_t amdgpu_get_pp_mclk_od(struct device *dev,
>>                  return ret;
>>          }
>>
>> -       down_read(&adev->reset_sem);
>> -
>>          if (is_support_sw_smu(adev))
>>                  value = smu_get_od_percentage(&(adev->smu), SMU_OD_MCLK);
>>          else if (adev->powerplay.pp_funcs->get_mclk_od)
>>                  value = amdgpu_dpm_get_mclk_od(adev);
>>
>> -       up_read(&adev->reset_sem);
>> -
>>          pm_runtime_mark_last_busy(ddev->dev);
>>          pm_runtime_put_autosuspend(ddev->dev);
>>
>> @@ -1733,7 +1624,7 @@ static ssize_t amdgpu_set_pp_mclk_od(struct device *dev,
>>          int ret;
>>          long int value;
>>
>> -       if (amdgpu_in_reset(adev))
>> +       if (adev->in_gpu_reset)
>>                  return -EPERM;
>>
>>          ret = kstrtol(buf, 0, &value);
>> @@ -1747,8 +1638,6 @@ static ssize_t amdgpu_set_pp_mclk_od(struct device *dev,
>>                  return ret;
>>          }
>>
>> -       down_read(&adev->reset_sem);
>> -
>>          if (is_support_sw_smu(adev)) {
>>                  value = smu_set_od_percentage(&(adev->smu), SMU_OD_MCLK, (uint32_t)value);
>>          } else {
>> @@ -1763,8 +1652,6 @@ static ssize_t amdgpu_set_pp_mclk_od(struct device *dev,
>>                  }
>>          }
>>
>> -       up_read(&adev->reset_sem);
>> -
>>          pm_runtime_mark_last_busy(ddev->dev);
>>          pm_runtime_put_autosuspend(ddev->dev);
>>
>> @@ -1800,7 +1687,7 @@ static ssize_t amdgpu_get_pp_power_profile_mode(struct device *dev,
>>          ssize_t size;
>>          int ret;
>>
>> -       if (amdgpu_in_reset(adev))
>> +       if (adev->in_gpu_reset)
>>                  return -EPERM;
>>
>>          ret = pm_runtime_get_sync(ddev->dev);
>> @@ -1809,8 +1696,6 @@ static ssize_t amdgpu_get_pp_power_profile_mode(struct device *dev,
>>                  return ret;
>>          }
>>
>> -       down_read(&adev->reset_sem);
>> -
>>          if (is_support_sw_smu(adev))
>>                  size = smu_get_power_profile_mode(&adev->smu, buf);
>>          else if (adev->powerplay.pp_funcs->get_power_profile_mode)
>> @@ -1818,8 +1703,6 @@ static ssize_t amdgpu_get_pp_power_profile_mode(struct device *dev,
>>          else
>>                  size = snprintf(buf, PAGE_SIZE, "\n");
>>
>> -       up_read(&adev->reset_sem);
>> -
>>          pm_runtime_mark_last_busy(ddev->dev);
>>          pm_runtime_put_autosuspend(ddev->dev);
>>
>> @@ -1844,7 +1727,7 @@ static ssize_t amdgpu_set_pp_power_profile_mode(struct device *dev,
>>          long int profile_mode = 0;
>>          const char delimiter[3] = {' ', '\n', '\0'};
>>
>> -       if (amdgpu_in_reset(adev))
>> +       if (adev->in_gpu_reset)
>>                  return -EPERM;
>>
>>          tmp[0] = *(buf);
>> @@ -1878,15 +1761,11 @@ static ssize_t amdgpu_set_pp_power_profile_mode(struct device *dev,
>>                  return ret;
>>          }
>>
>> -       down_read(&adev->reset_sem);
>> -
>>          if (is_support_sw_smu(adev))
>>                  ret = smu_set_power_profile_mode(&adev->smu, parameter, parameter_size, true);
>>          else if (adev->powerplay.pp_funcs->set_power_profile_mode)
>>                  ret = amdgpu_dpm_set_power_profile_mode(adev, parameter, parameter_size);
>>
>> -       up_read(&adev->reset_sem);
>> -
>>          pm_runtime_mark_last_busy(ddev->dev);
>>          pm_runtime_put_autosuspend(ddev->dev);
>>
>> @@ -1912,7 +1791,7 @@ static ssize_t amdgpu_get_gpu_busy_percent(struct device *dev,
>>          struct amdgpu_device *adev = ddev->dev_private;
>>          int r, value, size = sizeof(value);
>>
>> -       if (amdgpu_in_reset(adev))
>> +       if (adev->in_gpu_reset)
>>                  return -EPERM;
>>
>>          r = pm_runtime_get_sync(ddev->dev);
>> @@ -1921,11 +1800,9 @@ static ssize_t amdgpu_get_gpu_busy_percent(struct device *dev,
>>                  return r;
>>          }
>>
>> -       down_read(&adev->reset_sem);
>>          /* read the IP busy sensor */
>>          r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GPU_LOAD,
>>                                     (void *)&value, &size);
>> -       up_read(&adev->reset_sem);
>>
>>          pm_runtime_mark_last_busy(ddev->dev);
>>          pm_runtime_put_autosuspend(ddev->dev);
>> @@ -1952,7 +1829,7 @@ static ssize_t amdgpu_get_mem_busy_percent(struct device *dev,
>>          struct amdgpu_device *adev = ddev->dev_private;
>>          int r, value, size = sizeof(value);
>>
>> -       if (amdgpu_in_reset(adev))
>> +       if (adev->in_gpu_reset)
>>                  return -EPERM;
>>
>>          r = pm_runtime_get_sync(ddev->dev);
>> @@ -1961,14 +1838,10 @@ static ssize_t amdgpu_get_mem_busy_percent(struct device *dev,
>>                  return r;
>>          }
>>
>> -       down_read(&adev->reset_sem);
>> -
>>          /* read the IP busy sensor */
>>          r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_MEM_LOAD,
>>                                     (void *)&value, &size);
>>
>> -       up_read(&adev->reset_sem);
>> -
>>          pm_runtime_mark_last_busy(ddev->dev);
>>          pm_runtime_put_autosuspend(ddev->dev);
>>
>> @@ -1999,7 +1872,7 @@ static ssize_t amdgpu_get_pcie_bw(struct device *dev,
>>          uint64_t count0 = 0, count1 = 0;
>>          int ret;
>>
>> -       if (amdgpu_in_reset(adev))
>> +       if (adev->in_gpu_reset)
>>                  return -EPERM;
>>
>>          if (adev->flags & AMD_IS_APU)
>> @@ -2014,12 +1887,8 @@ static ssize_t amdgpu_get_pcie_bw(struct device *dev,
>>                  return ret;
>>          }
>>
>> -       down_read(&adev->reset_sem);
>> -
>>          amdgpu_asic_get_pcie_usage(adev, &count0, &count1);
>>
>> -       up_read(&adev->reset_sem);
>> -
>>          pm_runtime_mark_last_busy(ddev->dev);
>>          pm_runtime_put_autosuspend(ddev->dev);
>>
>> @@ -2044,7 +1913,7 @@ static ssize_t amdgpu_get_unique_id(struct device *dev,
>>          struct drm_device *ddev = dev_get_drvdata(dev);
>>          struct amdgpu_device *adev = ddev->dev_private;
>>
>> -       if (amdgpu_in_reset(adev))
>> +       if (adev->in_gpu_reset)
>>                  return -EPERM;
>>
>>          if (adev->unique_id)
>> @@ -2142,7 +2011,7 @@ static ssize_t amdgpu_get_gpu_metrics(struct device *dev,
>>          ssize_t size = 0;
>>          int ret;
>>
>> -       if (amdgpu_in_reset(adev))
>> +       if (adev->in_gpu_reset)
>>                  return -EPERM;
>>
>>          ret = pm_runtime_get_sync(ddev->dev);
>> @@ -2151,12 +2020,10 @@ static ssize_t amdgpu_get_gpu_metrics(struct device *dev,
>>                  return ret;
>>          }
>>
>> -       down_read(&adev->reset_sem);
>>          if (is_support_sw_smu(adev))
>>                  size = smu_sys_get_gpu_metrics(&adev->smu, &gpu_metrics);
>>          else if (adev->powerplay.pp_funcs->get_gpu_metrics)
>>                  size = amdgpu_dpm_get_gpu_metrics(adev, &gpu_metrics);
>> -       up_read(&adev->reset_sem);
>>
>>          if (size <= 0)
>>                  goto out;
>> @@ -2368,7 +2235,7 @@ static ssize_t amdgpu_hwmon_show_temp(struct device *dev,
>>          int channel = to_sensor_dev_attr(attr)->index;
>>          int r, temp = 0, size = sizeof(temp);
>>
>> -       if (amdgpu_in_reset(adev))
>> +       if (adev->in_gpu_reset)
>>                  return -EPERM;
>>
>>          if (channel >= PP_TEMP_MAX)
>> @@ -2380,8 +2247,6 @@ static ssize_t amdgpu_hwmon_show_temp(struct device *dev,
>>                  return r;
>>          }
>>
>> -       down_read(&adev->reset_sem);
>> -
>>          switch (channel) {
>>          case PP_TEMP_JUNCTION:
>>                  /* get current junction temperature */
>> @@ -2403,8 +2268,6 @@ static ssize_t amdgpu_hwmon_show_temp(struct device *dev,
>>                  break;
>>          }
>>
>> -       up_read(&adev->reset_sem);
>> -
>>          pm_runtime_mark_last_busy(adev->ddev->dev);
>>          pm_runtime_put_autosuspend(adev->ddev->dev);
>>
>> @@ -2508,7 +2371,7 @@ static ssize_t amdgpu_hwmon_get_pwm1_enable(struct device *dev,
>>          u32 pwm_mode = 0;
>>          int ret;
>>
>> -       if (amdgpu_in_reset(adev))
>> +       if (adev->in_gpu_reset)
>>                  return -EPERM;
>>
>>          ret = pm_runtime_get_sync(adev->ddev->dev);
>> @@ -2517,23 +2380,18 @@ static ssize_t amdgpu_hwmon_get_pwm1_enable(struct device *dev,
>>                  return ret;
>>          }
>>
>> -       down_read(&adev->reset_sem);
>> -
>>          if (is_support_sw_smu(adev)) {
>>                  pwm_mode = smu_get_fan_control_mode(&adev->smu);
>>          } else {
>>                  if (!adev->powerplay.pp_funcs->get_fan_control_mode) {
>>                          pm_runtime_mark_last_busy(adev->ddev->dev);
>>                          pm_runtime_put_autosuspend(adev->ddev->dev);
>> -                       up_read(&adev->reset_sem);
>>                          return -EINVAL;
>>                  }
>>
>>                  pwm_mode = amdgpu_dpm_get_fan_control_mode(adev);
>>          }
>>
>> -       up_read(&adev->reset_sem);
>> -
>>          pm_runtime_mark_last_busy(adev->ddev->dev);
>>          pm_runtime_put_autosuspend(adev->ddev->dev);
>>
>> @@ -2549,7 +2407,7 @@ static ssize_t amdgpu_hwmon_set_pwm1_enable(struct device *dev,
>>          int err, ret;
>>          int value;
>>
>> -       if (amdgpu_in_reset(adev))
>> +       if (adev->in_gpu_reset)
>>                  return -EPERM;
>>
>>          err = kstrtoint(buf, 10, &value);
>> @@ -2562,23 +2420,18 @@ static ssize_t amdgpu_hwmon_set_pwm1_enable(struct device *dev,
>>                  return ret;
>>          }
>>
>> -       down_read(&adev->reset_sem);
>> -
>>          if (is_support_sw_smu(adev)) {
>>                  smu_set_fan_control_mode(&adev->smu, value);
>>          } else {
>>                  if (!adev->powerplay.pp_funcs->set_fan_control_mode) {
>>                          pm_runtime_mark_last_busy(adev->ddev->dev);
>>                          pm_runtime_put_autosuspend(adev->ddev->dev);
>> -                       up_read(&adev->reset_sem);
>>                          return -EINVAL;
>>                  }
>>
>>                  amdgpu_dpm_set_fan_control_mode(adev, value);
>>          }
>>
>> -       up_read(&adev->reset_sem);
>> -
>>          pm_runtime_mark_last_busy(adev->ddev->dev);
>>          pm_runtime_put_autosuspend(adev->ddev->dev);
>>
>> @@ -2608,7 +2461,7 @@ static ssize_t amdgpu_hwmon_set_pwm1(struct device *dev,
>>          u32 value;
>>          u32 pwm_mode;
>>
>> -       if (amdgpu_in_reset(adev))
>> +       if (adev->in_gpu_reset)
>>                  return -EPERM;
>>
>>          err = pm_runtime_get_sync(adev->ddev->dev);
>> @@ -2617,15 +2470,11 @@ static ssize_t amdgpu_hwmon_set_pwm1(struct device *dev,
>>                  return err;
>>          }
>>
>> -       down_read(&adev->reset_sem);
>> -
>>          if (is_support_sw_smu(adev))
>>                  pwm_mode = smu_get_fan_control_mode(&adev->smu);
>>          else
>>                  pwm_mode = amdgpu_dpm_get_fan_control_mode(adev);
>>
>> -       up_read(&adev->reset_sem);
>> -
>>          if (pwm_mode != AMD_FAN_CTRL_MANUAL) {
>>                  pr_info("manual fan speed control should be enabled first\n");
>>                  pm_runtime_mark_last_busy(adev->ddev->dev);
>> @@ -2666,7 +2515,7 @@ static ssize_t amdgpu_hwmon_get_pwm1(struct device *dev,
>>          int err;
>>          u32 speed = 0;
>>
>> -       if (amdgpu_in_reset(adev))
>> +       if (adev->in_gpu_reset)
>>                  return -EPERM;
>>
>>          err = pm_runtime_get_sync(adev->ddev->dev);
>> @@ -2675,8 +2524,6 @@ static ssize_t amdgpu_hwmon_get_pwm1(struct device *dev,
>>                  return err;
>>          }
>>
>> -       down_read(&adev->reset_sem);
>> -
>>          if (is_support_sw_smu(adev))
>>                  err = smu_get_fan_speed_percent(&adev->smu, &speed);
>>          else if (adev->powerplay.pp_funcs->get_fan_speed_percent)
>> @@ -2684,8 +2531,6 @@ static ssize_t amdgpu_hwmon_get_pwm1(struct device *dev,
>>          else
>>                  err = -EINVAL;
>>
>> -       up_read(&adev->reset_sem);
>> -
>>          pm_runtime_mark_last_busy(adev->ddev->dev);
>>          pm_runtime_put_autosuspend(adev->ddev->dev);
>>
>> @@ -2705,7 +2550,7 @@ static ssize_t amdgpu_hwmon_get_fan1_input(struct device *dev,
>>          int err;
>>          u32 speed = 0;
>>
>> -       if (amdgpu_in_reset(adev))
>> +       if (adev->in_gpu_reset)
>>                  return -EPERM;
>>
>>          err = pm_runtime_get_sync(adev->ddev->dev);
>> @@ -2714,8 +2559,6 @@ static ssize_t amdgpu_hwmon_get_fan1_input(struct device *dev,
>>                  return err;
>>          }
>>
>> -       down_read(&adev->reset_sem);
>> -
>>          if (is_support_sw_smu(adev))
>>                  err = smu_get_fan_speed_rpm(&adev->smu, &speed);
>>          else if (adev->powerplay.pp_funcs->get_fan_speed_rpm)
>> @@ -2723,8 +2566,6 @@ static ssize_t amdgpu_hwmon_get_fan1_input(struct device *dev,
>>          else
>>                  err = -EINVAL;
>>
>> -       up_read(&adev->reset_sem);
>> -
>>          pm_runtime_mark_last_busy(adev->ddev->dev);
>>          pm_runtime_put_autosuspend(adev->ddev->dev);
>>
>> @@ -2743,7 +2584,7 @@ static ssize_t amdgpu_hwmon_get_fan1_min(struct device *dev,
>>          u32 size = sizeof(min_rpm);
>>          int r;
>>
>> -       if (amdgpu_in_reset(adev))
>> +       if (adev->in_gpu_reset)
>>                  return -EPERM;
>>
>>          r = pm_runtime_get_sync(adev->ddev->dev);
>> @@ -2752,13 +2593,9 @@ static ssize_t amdgpu_hwmon_get_fan1_min(struct device *dev,
>>                  return r;
>>          }
>>
>> -       down_read(&adev->reset_sem);
>> -
>>          r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_MIN_FAN_RPM,
>>                                     (void *)&min_rpm, &size);
>>
>> -       up_read(&adev->reset_sem);
>> -
>>          pm_runtime_mark_last_busy(adev->ddev->dev);
>>          pm_runtime_put_autosuspend(adev->ddev->dev);
>>
>> @@ -2777,7 +2614,7 @@ static ssize_t amdgpu_hwmon_get_fan1_max(struct device *dev,
>>          u32 size = sizeof(max_rpm);
>>          int r;
>>
>> -       if (amdgpu_in_reset(adev))
>> +       if (adev->in_gpu_reset)
>>                  return -EPERM;
>>
>>          r = pm_runtime_get_sync(adev->ddev->dev);
>> @@ -2786,13 +2623,9 @@ static ssize_t amdgpu_hwmon_get_fan1_max(struct device *dev,
>>                  return r;
>>          }
>>
>> -       down_read(&adev->reset_sem);
>> -
>>          r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_MAX_FAN_RPM,
>>                                     (void *)&max_rpm, &size);
>>
>> -       up_read(&adev->reset_sem);
>> -
>>          pm_runtime_mark_last_busy(adev->ddev->dev);
>>          pm_runtime_put_autosuspend(adev->ddev->dev);
>>
>> @@ -2810,7 +2643,7 @@ static ssize_t amdgpu_hwmon_get_fan1_target(struct device *dev,
>>          int err;
>>          u32 rpm = 0;
>>
>> -       if (amdgpu_in_reset(adev))
>> +       if (adev->in_gpu_reset)
>>                  return -EPERM;
>>
>>          err = pm_runtime_get_sync(adev->ddev->dev);
>> @@ -2819,8 +2652,6 @@ static ssize_t amdgpu_hwmon_get_fan1_target(struct device *dev,
>>                  return err;
>>          }
>>
>> -       down_read(&adev->reset_sem);
>> -
>>          if (is_support_sw_smu(adev))
>>                  err = smu_get_fan_speed_rpm(&adev->smu, &rpm);
>>          else if (adev->powerplay.pp_funcs->get_fan_speed_rpm)
>> @@ -2828,8 +2659,6 @@ static ssize_t amdgpu_hwmon_get_fan1_target(struct device *dev,
>>          else
>>                  err = -EINVAL;
>>
>> -       up_read(&adev->reset_sem);
>> -
>>          pm_runtime_mark_last_busy(adev->ddev->dev);
>>          pm_runtime_put_autosuspend(adev->ddev->dev);
>>
>> @@ -2848,7 +2677,7 @@ static ssize_t amdgpu_hwmon_set_fan1_target(struct device *dev,
>>          u32 value;
>>          u32 pwm_mode;
>>
>> -       if (amdgpu_in_reset(adev))
>> +       if (adev->in_gpu_reset)
>>                  return -EPERM;
>>
>>          err = pm_runtime_get_sync(adev->ddev->dev);
>> @@ -2857,15 +2686,11 @@ static ssize_t amdgpu_hwmon_set_fan1_target(struct device *dev,
>>                  return err;
>>          }
>>
>> -       down_read(&adev->reset_sem);
>> -
>>          if (is_support_sw_smu(adev))
>>                  pwm_mode = smu_get_fan_control_mode(&adev->smu);
>>          else
>>                  pwm_mode = amdgpu_dpm_get_fan_control_mode(adev);
>>
>> -       up_read(&adev->reset_sem);
>> -
>>          if (pwm_mode != AMD_FAN_CTRL_MANUAL) {
>>                  pm_runtime_mark_last_busy(adev->ddev->dev);
>>                  pm_runtime_put_autosuspend(adev->ddev->dev);
>> @@ -2879,8 +2704,6 @@ static ssize_t amdgpu_hwmon_set_fan1_target(struct device *dev,
>>                  return err;
>>          }
>>
>> -       down_read(&adev->reset_sem);
>> -
>>          if (is_support_sw_smu(adev))
>>                  err = smu_set_fan_speed_rpm(&adev->smu, value);
>>          else if (adev->powerplay.pp_funcs->set_fan_speed_rpm)
>> @@ -2888,8 +2711,6 @@ static ssize_t amdgpu_hwmon_set_fan1_target(struct device *dev,
>>          else
>>                  err = -EINVAL;
>>
>> -       up_read(&adev->reset_sem);
>> -
>>          pm_runtime_mark_last_busy(adev->ddev->dev);
>>          pm_runtime_put_autosuspend(adev->ddev->dev);
>>
>> @@ -2907,7 +2728,7 @@ static ssize_t amdgpu_hwmon_get_fan1_enable(struct device *dev,
>>          u32 pwm_mode = 0;
>>          int ret;
>>
>> -       if (amdgpu_in_reset(adev))
>> +       if (adev->in_gpu_reset)
>>                  return -EPERM;
>>
>>          ret = pm_runtime_get_sync(adev->ddev->dev);
>> @@ -2916,23 +2737,18 @@ static ssize_t amdgpu_hwmon_get_fan1_enable(struct device *dev,
>>                  return ret;
>>          }
>>
>> -       down_read(&adev->reset_sem);
>> -
>>          if (is_support_sw_smu(adev)) {
>>                  pwm_mode = smu_get_fan_control_mode(&adev->smu);
>>          } else {
>>                  if (!adev->powerplay.pp_funcs->get_fan_control_mode) {
>>                          pm_runtime_mark_last_busy(adev->ddev->dev);
>>                          pm_runtime_put_autosuspend(adev->ddev->dev);
>> -                       up_read(&adev->reset_sem);
>>                          return -EINVAL;
>>                  }
>>
>>                  pwm_mode = amdgpu_dpm_get_fan_control_mode(adev);
>>          }
>>
>> -       up_read(&adev->reset_sem);
>> -
>>          pm_runtime_mark_last_busy(adev->ddev->dev);
>>          pm_runtime_put_autosuspend(adev->ddev->dev);
>>
>> @@ -2949,7 +2765,7 @@ static ssize_t amdgpu_hwmon_set_fan1_enable(struct device *dev,
>>          int value;
>>          u32 pwm_mode;
>>
>> -       if (amdgpu_in_reset(adev))
>> +       if (adev->in_gpu_reset)
>>                  return -EPERM;
>>
>>          err = kstrtoint(buf, 10, &value);
>> @@ -2969,22 +2785,17 @@ static ssize_t amdgpu_hwmon_set_fan1_enable(struct device *dev,
>>                  return err;
>>          }
>>
>> -       down_read(&adev->reset_sem);
>> -
>>          if (is_support_sw_smu(adev)) {
>>                  smu_set_fan_control_mode(&adev->smu, pwm_mode);
>>          } else {
>>                  if (!adev->powerplay.pp_funcs->set_fan_control_mode) {
>>                          pm_runtime_mark_last_busy(adev->ddev->dev);
>>                          pm_runtime_put_autosuspend(adev->ddev->dev);
>> -                       up_read(&adev->reset_sem);
>>                          return -EINVAL;
>>                  }
>>                  amdgpu_dpm_set_fan_control_mode(adev, pwm_mode);
>>          }
>>
>> -       up_read(&adev->reset_sem);
>> -
>>          pm_runtime_mark_last_busy(adev->ddev->dev);
>>          pm_runtime_put_autosuspend(adev->ddev->dev);
>>
>> @@ -2999,7 +2810,7 @@ static ssize_t amdgpu_hwmon_show_vddgfx(struct device *dev,
>>          u32 vddgfx;
>>          int r, size = sizeof(vddgfx);
>>
>> -       if (amdgpu_in_reset(adev))
>> +       if (adev->in_gpu_reset)
>>                  return -EPERM;
>>
>>          r = pm_runtime_get_sync(adev->ddev->dev);
>> @@ -3008,11 +2819,9 @@ static ssize_t amdgpu_hwmon_show_vddgfx(struct device *dev,
>>                  return r;
>>          }
>>
>> -       down_read(&adev->reset_sem);
>>          /* get the voltage */
>>          r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_VDDGFX,
>>                                     (void *)&vddgfx, &size);
>> -       up_read(&adev->reset_sem);
>>
>>          pm_runtime_mark_last_busy(adev->ddev->dev);
>>          pm_runtime_put_autosuspend(adev->ddev->dev);
>> @@ -3038,7 +2847,7 @@ static ssize_t amdgpu_hwmon_show_vddnb(struct device *dev,
>>          u32 vddnb;
>>          int r, size = sizeof(vddnb);
>>
>> -       if (amdgpu_in_reset(adev))
>> +       if (adev->in_gpu_reset)
>>                  return -EPERM;
>>
>>          /* only APUs have vddnb */
>> @@ -3051,11 +2860,9 @@ static ssize_t amdgpu_hwmon_show_vddnb(struct device *dev,
>>                  return r;
>>          }
>>
>> -       down_read(&adev->reset_sem);
>>          /* get the voltage */
>>          r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_VDDNB,
>>                                     (void *)&vddnb, &size);
>> -       up_read(&adev->reset_sem);
>>
>>          pm_runtime_mark_last_busy(adev->ddev->dev);
>>          pm_runtime_put_autosuspend(adev->ddev->dev);
>> @@ -3082,7 +2889,7 @@ static ssize_t amdgpu_hwmon_show_power_avg(struct device *dev,
>>          int r, size = sizeof(u32);
>>          unsigned uw;
>>
>> -       if (amdgpu_in_reset(adev))
>> +       if (adev->in_gpu_reset)
>>                  return -EPERM;
>>
>>          r = pm_runtime_get_sync(adev->ddev->dev);
>> @@ -3091,11 +2898,9 @@ static ssize_t amdgpu_hwmon_show_power_avg(struct device *dev,
>>                  return r;
>>          }
>>
>> -       down_read(&adev->reset_sem);
>>          /* get the voltage */
>>          r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GPU_POWER,
>>                                     (void *)&query, &size);
>> -       up_read(&adev->reset_sem);
>>
>>          pm_runtime_mark_last_busy(adev->ddev->dev);
>>          pm_runtime_put_autosuspend(adev->ddev->dev);
>> @@ -3125,7 +2930,7 @@ static ssize_t amdgpu_hwmon_show_power_cap_max(struct device *dev,
>>          ssize_t size;
>>          int r;
>>
>> -       if (amdgpu_in_reset(adev))
>> +       if (adev->in_gpu_reset)
>>                  return -EPERM;
>>
>>          r = pm_runtime_get_sync(adev->ddev->dev);
>> @@ -3134,8 +2939,6 @@ static ssize_t amdgpu_hwmon_show_power_cap_max(struct device *dev,
>>                  return r;
>>          }
>>
>> -       down_read(&adev->reset_sem);
>> -
>>          if (is_support_sw_smu(adev)) {
>>                  smu_get_power_limit(&adev->smu, &limit, true);
>>                  size = snprintf(buf, PAGE_SIZE, "%u\n", limit * 1000000);
>> @@ -3146,8 +2949,6 @@ static ssize_t amdgpu_hwmon_show_power_cap_max(struct device *dev,
>>                  size = snprintf(buf, PAGE_SIZE, "\n");
>>          }
>>
>> -       up_read(&adev->reset_sem);
>> -
>>          pm_runtime_mark_last_busy(adev->ddev->dev);
>>          pm_runtime_put_autosuspend(adev->ddev->dev);
>>
>> @@ -3163,7 +2964,7 @@ static ssize_t amdgpu_hwmon_show_power_cap(struct device *dev,
>>          ssize_t size;
>>          int r;
>>
>> -       if (amdgpu_in_reset(adev))
>> +       if (adev->in_gpu_reset)
>>                  return -EPERM;
>>
>>          r = pm_runtime_get_sync(adev->ddev->dev);
>> @@ -3172,8 +2973,6 @@ static ssize_t amdgpu_hwmon_show_power_cap(struct device *dev,
>>                  return r;
>>          }
>>
>> -       down_read(&adev->reset_sem);
>> -
>>          if (is_support_sw_smu(adev)) {
>>                  smu_get_power_limit(&adev->smu, &limit, false);
>>                  size = snprintf(buf, PAGE_SIZE, "%u\n", limit * 1000000);
>> @@ -3184,8 +2983,6 @@ static ssize_t amdgpu_hwmon_show_power_cap(struct device *dev,
>>                  size = snprintf(buf, PAGE_SIZE, "\n");
>>          }
>>
>> -       up_read(&adev->reset_sem);
>> -
>>          pm_runtime_mark_last_busy(adev->ddev->dev);
>>          pm_runtime_put_autosuspend(adev->ddev->dev);
>>
>> @@ -3202,7 +2999,7 @@ static ssize_t amdgpu_hwmon_set_power_cap(struct device *dev,
>>          int err;
>>          u32 value;
>>
>> -       if (amdgpu_in_reset(adev))
>> +       if (adev->in_gpu_reset)
>>                  return -EPERM;
>>
>>          if (amdgpu_sriov_vf(adev))
>> @@ -3221,8 +3018,6 @@ static ssize_t amdgpu_hwmon_set_power_cap(struct device *dev,
>>                  return err;
>>          }
>>
>> -       down_read(&adev->reset_sem);
>> -
>>          if (is_support_sw_smu(adev))
>>                  err = smu_set_power_limit(&adev->smu, value);
>>          else if (adev->powerplay.pp_funcs && adev->powerplay.pp_funcs->set_power_limit)
>> @@ -3230,8 +3025,6 @@ static ssize_t amdgpu_hwmon_set_power_cap(struct device *dev,
>>          else
>>                  err = -EINVAL;
>>
>> -       up_read(&adev->reset_sem);
>> -
>>          pm_runtime_mark_last_busy(adev->ddev->dev);
>>          pm_runtime_put_autosuspend(adev->ddev->dev);
>>
>> @@ -3249,7 +3042,7 @@ static ssize_t amdgpu_hwmon_show_sclk(struct device *dev,
>>          uint32_t sclk;
>>          int r, size = sizeof(sclk);
>>
>> -       if (amdgpu_in_reset(adev))
>> +       if (adev->in_gpu_reset)
>>                  return -EPERM;
>>
>>          r = pm_runtime_get_sync(adev->ddev->dev);
>> @@ -3258,11 +3051,9 @@ static ssize_t amdgpu_hwmon_show_sclk(struct device *dev,
>>                  return r;
>>          }
>>
>> -       down_read(&adev->reset_sem);
>>          /* get the sclk */
>>          r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GFX_SCLK,
>>                                     (void *)&sclk, &size);
>> -       up_read(&adev->reset_sem);
>>
>>          pm_runtime_mark_last_busy(adev->ddev->dev);
>>          pm_runtime_put_autosuspend(adev->ddev->dev);
>> @@ -3288,7 +3079,7 @@ static ssize_t amdgpu_hwmon_show_mclk(struct device *dev,
>>          uint32_t mclk;
>>          int r, size = sizeof(mclk);
>>
>> -       if (amdgpu_in_reset(adev))
>> +       if (adev->in_gpu_reset)
>>                  return -EPERM;
>>
>>          r = pm_runtime_get_sync(adev->ddev->dev);
>> @@ -3297,11 +3088,9 @@ static ssize_t amdgpu_hwmon_show_mclk(struct device *dev,
>>                  return r;
>>          }
>>
>> -       down_read(&adev->reset_sem);
>>          /* get the sclk */
>>          r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GFX_MCLK,
>>                                     (void *)&mclk, &size);
>> -       up_read(&adev->reset_sem);
>>
>>          pm_runtime_mark_last_busy(adev->ddev->dev);
>>          pm_runtime_put_autosuspend(adev->ddev->dev);
>> @@ -4188,7 +3977,7 @@ static int amdgpu_debugfs_pm_info(struct seq_file *m, void *data)
>>          u32 flags = 0;
>>          int r;
>>
>> -       if (amdgpu_in_reset(adev))
>> +       if (adev->in_gpu_reset)
>>                  return -EPERM;
>>
>>          r = pm_runtime_get_sync(dev->dev);
>> @@ -4204,7 +3993,6 @@ static int amdgpu_debugfs_pm_info(struct seq_file *m, void *data)
>>                  return 0;
>>          }
>>
>> -       down_read(&adev->reset_sem);
>>          if (!is_support_sw_smu(adev) &&
>>              adev->powerplay.pp_funcs->debugfs_print_current_performance_level) {
>>                  mutex_lock(&adev->pm.mutex);
>> @@ -4217,13 +4005,10 @@ static int amdgpu_debugfs_pm_info(struct seq_file *m, void *data)
>>          } else {
>>                  r = amdgpu_debugfs_pm_info_pp(m, adev);
>>          }
>> -       up_read(&adev->reset_sem);
>>          if (r)
>>                  goto out;
>>
>> -       down_read(&adev->reset_sem);
>>          amdgpu_device_ip_get_clockgating_state(adev, &flags);
>> -       up_read(&adev->reset_sem);
>>
>>          seq_printf(m, "Clock Gating Flags Mask: 0x%x\n", flags);
>>          amdgpu_parse_cg_state(m, flags);
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
>> index 116a89990f39..aa1e77c60c0a 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
>> @@ -1869,7 +1869,7 @@ static int psp_load_smu_fw(struct psp_context *psp)
>>                  return 0;
>>
>>
>> -       if (amdgpu_in_reset(adev) && ras && ras->supported) {
>> +       if (adev->in_gpu_reset && ras && ras->supported) {
>>                  ret = amdgpu_dpm_set_mp1_state(adev, PP_MP1_STATE_UNLOAD);
>>                  if (ret) {
>>                          DRM_WARN("Failed to set MP1 state prepare for reload\n");
>> @@ -1984,7 +1984,7 @@ static int psp_load_fw(struct amdgpu_device *adev)
>>          int ret;
>>          struct psp_context *psp = &adev->psp;
>>
>> -       if (amdgpu_sriov_vf(adev) && amdgpu_in_reset(adev)) {
>> +       if (amdgpu_sriov_vf(adev) && adev->in_gpu_reset) {
>>                  psp_ring_stop(psp, PSP_RING_TYPE__KM); /* should not destroy ring, only stop */
>>                  goto skip_memalloc;
>>          }
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
>> index cd1403f83dcf..f09082578865 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
>> @@ -2079,7 +2079,7 @@ int amdgpu_ras_late_init(struct amdgpu_device *adev,
>>                          amdgpu_ras_request_reset_on_boot(adev,
>>                                          ras_block->block);
>>                          return 0;
>> -               } else if (adev->in_suspend || amdgpu_in_reset(adev)) {
>> +               } else if (adev->in_suspend || adev->in_gpu_reset) {
>>                          /* in resume phase, if fail to enable ras,
>>                           * clean up all ras fs nodes, and disable ras */
>>                          goto cleanup;
>> @@ -2088,7 +2088,7 @@ int amdgpu_ras_late_init(struct amdgpu_device *adev,
>>          }
>>
>>          /* in resume phase, no need to create ras fs node */
>> -       if (adev->in_suspend || amdgpu_in_reset(adev))
>> +       if (adev->in_suspend || adev->in_gpu_reset)
>>                  return 0;
>>
>>          if (ih_info->cb) {
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>> index 20fa0497aaa4..1e19d130473f 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>> @@ -2103,7 +2103,7 @@ void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, bool enable)
>>          uint64_t size;
>>          int r;
>>
>> -       if (!adev->mman.initialized || amdgpu_in_reset(adev) ||
>> +       if (!adev->mman.initialized || adev->in_gpu_reset ||
>>              adev->mman.buffer_funcs_enabled == enable)
>>                  return;
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
>> index 039245c98ff8..183743c5fb7b 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
>> @@ -628,8 +628,7 @@ int amdgpu_ucode_init_bo(struct amdgpu_device *adev)
>>          struct amdgpu_firmware_info *ucode = NULL;
>>
>>    /* for baremetal, the ucode is allocated in gtt, so don't need to fill the bo when reset/suspend */
>> -       if (!amdgpu_sriov_vf(adev) &&
>> -               (amdgpu_in_reset(adev) || adev->in_suspend))
>> +       if (!amdgpu_sriov_vf(adev) && (adev->in_gpu_reset || adev->in_suspend))
>>                  return 0;
>>          /*
>>           * if SMU loaded firmware, it needn't add SMC, UVD, and VCE
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
>> index 1e211544f2dc..ae720a6dc5a0 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
>> @@ -93,7 +93,7 @@ void amdgpu_virt_kiq_reg_write_reg_wait(struct amdgpu_device *adev,
>>          amdgpu_ring_undo(ring);
>>          spin_unlock_irqrestore(&kiq->ring_lock, flags);
>>   failed_kiq:
>> -       dev_warn(adev->dev, "failed to write reg %x wait reg %x\n", reg0, reg1);
>> +       pr_err("failed to write reg %x wait reg %x\n", reg0, reg1);
>>   }
>>
>>   /**
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
>> index b2046c3a404d..f826945989c7 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
>> @@ -325,9 +325,9 @@ static inline bool is_virtual_machine(void)
>>   #define amdgpu_sriov_is_pp_one_vf(adev) \
>>          ((adev)->virt.gim_feature & AMDGIM_FEATURE_PP_ONE_VF)
>>   #define amdgpu_sriov_is_debug(adev) \
>> -       ((!amdgpu_in_reset(adev)) && adev->virt.tdr_debug)
>> +       ((!adev->in_gpu_reset) && adev->virt.tdr_debug)
>>   #define amdgpu_sriov_is_normal(adev) \
>> -       ((!amdgpu_in_reset(adev)) && (!adev->virt.tdr_debug))
>> +       ((!adev->in_gpu_reset) && (!adev->virt.tdr_debug))
>>
>>   bool amdgpu_virt_mmio_blocked(struct amdgpu_device *adev);
>>   void amdgpu_virt_init_setting(struct amdgpu_device *adev);
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
>> index 67a756f4337b..cd6e6eb7d966 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
>> @@ -372,7 +372,7 @@ struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev, int lo
>>          tmp->hive_id = adev->gmc.xgmi.hive_id;
>>          INIT_LIST_HEAD(&tmp->device_list);
>>          mutex_init(&tmp->hive_lock);
>> -       atomic_set(&tmp->in_reset, 0);
>> +       mutex_init(&tmp->reset_lock);
>>          task_barrier_init(&tmp->tb);
>>
>>          if (lock)
>> @@ -397,7 +397,6 @@ int amdgpu_xgmi_set_pstate(struct amdgpu_device *adev, int pstate)
>>                                                  hive->hi_req_gpu : adev;
>>          bool is_hi_req = pstate == AMDGPU_XGMI_PSTATE_MAX_VEGA20;
>>          bool init_low = hive->pstate == AMDGPU_XGMI_PSTATE_UNKNOWN;
>> -       bool locked;
>>
>>          /* fw bug so temporarily disable pstate switching */
>>          return 0;
>> @@ -405,9 +404,7 @@ int amdgpu_xgmi_set_pstate(struct amdgpu_device *adev, int pstate)
>>          if (!hive || adev->asic_type != CHIP_VEGA20)
>>                  return 0;
>>
>> -       locked = atomic_read(&hive->in_reset) ? false : true;
>> -       if (locked)
>> -               mutex_lock(&hive->hive_lock);
>> +       mutex_lock(&hive->hive_lock);
>>
>>          if (is_hi_req)
>>                  hive->hi_req_count++;
>> @@ -442,8 +439,7 @@ int amdgpu_xgmi_set_pstate(struct amdgpu_device *adev, int pstate)
>>                                                          adev : NULL;
>>          }
>>   out:
>> -       if (locked)
>> -               mutex_unlock(&hive->hive_lock);
>> +       mutex_unlock(&hive->hive_lock);
>>          return ret;
>>   }
>>
>> @@ -598,6 +594,7 @@ int amdgpu_xgmi_remove_device(struct amdgpu_device *adev)
>>          if(!(--hive->number_devices)){
>>                  amdgpu_xgmi_sysfs_destroy(adev, hive);
>>                  mutex_destroy(&hive->hive_lock);
>> +               mutex_destroy(&hive->reset_lock);
>>          }
>>
>>          return psp_xgmi_terminate(&adev->psp);
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h
>> index 61720cd4a1ee..6999eab16a72 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h
>> @@ -30,8 +30,7 @@ struct amdgpu_hive_info {
>>          uint64_t                hive_id;
>>          struct list_head        device_list;
>>          int number_devices;
>> -       struct mutex hive_lock;
>> -       atomic_t in_reset;
>> +       struct mutex hive_lock, reset_lock;
>>          struct kobject *kobj;
>>          struct device_attribute dev_attr;
>>          struct amdgpu_device *adev;
>> diff --git a/drivers/gpu/drm/amd/amdgpu/atom.c b/drivers/gpu/drm/amd/amdgpu/atom.c
>> index 8341bd965202..4cfc786699c7 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/atom.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/atom.c
>> @@ -755,7 +755,6 @@ static void atom_op_jump(atom_exec_context *ctx, int *ptr, int arg)
>>                                  /* jiffies wrap around we will just wait a little longer */
>>                                  ctx->last_jump_jiffies = jiffies;
>>                          }
>> -                       schedule();
>>                  } else {
>>                          ctx->last_jump = ctx->start + target;
>>                          ctx->last_jump_jiffies = jiffies;
>> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
>> index de6e6de41867..e87d43537013 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
>> @@ -6201,7 +6201,7 @@ static int gfx_v10_0_gfx_init_queue(struct amdgpu_ring *ring)
>>          struct v10_gfx_mqd *mqd = ring->mqd_ptr;
>>          int mqd_idx = ring - &adev->gfx.gfx_ring[0];
>>
>> -       if (!amdgpu_in_reset(adev) && !adev->in_suspend) {
>> +       if (!adev->in_gpu_reset && !adev->in_suspend) {
>>                  memset((void *)mqd, 0, sizeof(*mqd));
>>                  mutex_lock(&adev->srbm_mutex);
>>                  nv_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
>> @@ -6213,7 +6213,7 @@ static int gfx_v10_0_gfx_init_queue(struct amdgpu_ring *ring)
>>                  mutex_unlock(&adev->srbm_mutex);
>>                  if (adev->gfx.me.mqd_backup[mqd_idx])
>>                          memcpy(adev->gfx.me.mqd_backup[mqd_idx], mqd, sizeof(*mqd));
>> -       } else if (amdgpu_in_reset(adev)) {
>> +       } else if (adev->in_gpu_reset) {
>>                  /* reset mqd with the backup copy */
>>                  if (adev->gfx.me.mqd_backup[mqd_idx])
>>                          memcpy(mqd, adev->gfx.me.mqd_backup[mqd_idx], sizeof(*mqd));
>> @@ -6566,7 +6566,7 @@ static int gfx_v10_0_kiq_init_queue(struct amdgpu_ring *ring)
>>
>>          gfx_v10_0_kiq_setting(ring);
>>
>> -       if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */
>> +       if (adev->in_gpu_reset) { /* for GPU_RESET case */
>>                  /* reset MQD to a clean status */
>>                  if (adev->gfx.mec.mqd_backup[mqd_idx])
>>                          memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd));
>> @@ -6602,7 +6602,7 @@ static int gfx_v10_0_kcq_init_queue(struct amdgpu_ring *ring)
>>          struct v10_compute_mqd *mqd = ring->mqd_ptr;
>>          int mqd_idx = ring - &adev->gfx.compute_ring[0];
>>
>> -       if (!amdgpu_in_reset(adev) && !adev->in_suspend) {
>> +       if (!adev->in_gpu_reset && !adev->in_suspend) {
>>                  memset((void *)mqd, 0, sizeof(*mqd));
>>                  mutex_lock(&adev->srbm_mutex);
>>                  nv_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
>> @@ -6612,7 +6612,7 @@ static int gfx_v10_0_kcq_init_queue(struct amdgpu_ring *ring)
>>
>>                  if (adev->gfx.mec.mqd_backup[mqd_idx])
>>                          memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd));
>> -       } else if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */
>> +       } else if (adev->in_gpu_reset) { /* for GPU_RESET case */
>>                  /* reset MQD to a clean status */
>>                  if (adev->gfx.mec.mqd_backup[mqd_idx])
>>                          memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd));
>> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
>> index 7df567a6656d..14fd04b699da 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
>> @@ -4633,7 +4633,7 @@ static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring)
>>
>>          gfx_v8_0_kiq_setting(ring);
>>
>> -       if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */
>> +       if (adev->in_gpu_reset) { /* for GPU_RESET case */
>>                  /* reset MQD to a clean status */
>>                  if (adev->gfx.mec.mqd_backup[mqd_idx])
>>                          memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
>> @@ -4670,7 +4670,7 @@ static int gfx_v8_0_kcq_init_queue(struct amdgpu_ring *ring)
>>          struct vi_mqd *mqd = ring->mqd_ptr;
>>          int mqd_idx = ring - &adev->gfx.compute_ring[0];
>>
>> -       if (!amdgpu_in_reset(adev) && !adev->in_suspend) {
>> +       if (!adev->in_gpu_reset && !adev->in_suspend) {
>>                  memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
>>                  ((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
>>                  ((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
>> @@ -4682,7 +4682,7 @@ static int gfx_v8_0_kcq_init_queue(struct amdgpu_ring *ring)
>>
>>                  if (adev->gfx.mec.mqd_backup[mqd_idx])
>>                          memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
>> -       } else if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */
>> +       } else if (adev->in_gpu_reset) { /* for GPU_RESET case */
>>                  /* reset MQD to a clean status */
>>                  if (adev->gfx.mec.mqd_backup[mqd_idx])
>>                          memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
>> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
>> index 93c63ff3b35e..2c5bb282cc01 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
>> @@ -3686,7 +3686,7 @@ static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring)
>>
>>          gfx_v9_0_kiq_setting(ring);
>>
>> -       if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */
>> +       if (adev->in_gpu_reset) { /* for GPU_RESET case */
>>                  /* reset MQD to a clean status */
>>                  if (adev->gfx.mec.mqd_backup[mqd_idx])
>>                          memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
>> @@ -3724,7 +3724,7 @@ static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring)
>>          struct v9_mqd *mqd = ring->mqd_ptr;
>>          int mqd_idx = ring - &adev->gfx.compute_ring[0];
>>
>> -       if (!amdgpu_in_reset(adev) && !adev->in_suspend) {
>> +       if (!adev->in_gpu_reset && !adev->in_suspend) {
>>                  memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
>>                  ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
>>                  ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
>> @@ -3736,7 +3736,7 @@ static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring)
>>
>>                  if (adev->gfx.mec.mqd_backup[mqd_idx])
>>                          memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
>> -       } else if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */
>> +       } else if (adev->in_gpu_reset) { /* for GPU_RESET case */
>>                  /* reset MQD to a clean status */
>>                  if (adev->gfx.mec.mqd_backup[mqd_idx])
>>                          memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
>> @@ -3930,7 +3930,7 @@ static int gfx_v9_0_hw_fini(void *handle)
>>          /* Use deinitialize sequence from CAIL when unbinding device from driver,
>>           * otherwise KIQ is hanging when binding back
>>           */
>> -       if (!amdgpu_in_reset(adev) && !adev->in_suspend) {
>> +       if (!adev->in_gpu_reset && !adev->in_suspend) {
>>                  mutex_lock(&adev->srbm_mutex);
>>                  soc15_grbm_select(adev, adev->gfx.kiq.ring.me,
>>                                  adev->gfx.kiq.ring.pipe,
>> @@ -4088,7 +4088,7 @@ static uint64_t gfx_v9_0_kiq_read_clock(struct amdgpu_device *adev)
>>           *
>>           * also don't wait anymore for IRQ context
>>           * */
>> -       if (r < 1 && (amdgpu_in_reset(adev) || in_interrupt()))
>> +       if (r < 1 && (adev->in_gpu_reset || in_interrupt()))
>>                  goto failed_kiq_read;
>>
>>          might_sleep();
>> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
>> index 9d3b1245a339..ec8c0af39553 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
>> @@ -287,7 +287,7 @@ static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
>>           */
>>          if (adev->gfx.kiq.ring.sched.ready &&
>>              (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev)) &&
>> -           !amdgpu_in_reset(adev)) {
>> +           !adev->in_gpu_reset) {
>>
>>                  struct amdgpu_vmhub *hub = &adev->vmhub[vmhub];
>>                  const unsigned eng = 17;
>> @@ -312,7 +312,7 @@ static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
>>
>>          if (!adev->mman.buffer_funcs_enabled ||
>>              !adev->ib_pool_ready ||
>> -           amdgpu_in_reset(adev) ||
>> +           adev->in_gpu_reset ||
>>              ring->sched.ready == false) {
>>                  gmc_v10_0_flush_vm_hub(adev, vmid, AMDGPU_GFXHUB_0, 0);
>>                  mutex_unlock(&adev->mman.gtt_window_lock);
>> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
>> index 80c146df338a..3ce5c1d2fdf2 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
>> @@ -434,7 +434,7 @@ static int gmc_v7_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
>>          int vmid;
>>          unsigned int tmp;
>>
>> -       if (amdgpu_in_reset(adev))
>> +       if (adev->in_gpu_reset)
>>                  return -EIO;
>>
>>          for (vmid = 1; vmid < 16; vmid++) {
>> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
>> index 9ab65ca7df77..3e6615f9d39c 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
>> @@ -635,7 +635,7 @@ static int gmc_v8_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
>>          int vmid;
>>          unsigned int tmp;
>>
>> -       if (amdgpu_in_reset(adev))
>> +       if (adev->in_gpu_reset)
>>                  return -EIO;
>>
>>          for (vmid = 1; vmid < 16; vmid++) {
>> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>> index 773ee11b3d17..6a780b674018 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>> @@ -501,7 +501,7 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
>>           */
>>          if (adev->gfx.kiq.ring.sched.ready &&
>>                          (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev)) &&
>> -                       !amdgpu_in_reset(adev)) {
>> +                       !adev->in_gpu_reset) {
>>                  uint32_t req = hub->vm_inv_eng0_req + hub->eng_distance * eng;
>>                  uint32_t ack = hub->vm_inv_eng0_ack + hub->eng_distance * eng;
>>
>> @@ -596,7 +596,7 @@ static int gmc_v9_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
>>          struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
>>          struct amdgpu_kiq *kiq = &adev->gfx.kiq;
>>
>> -       if (amdgpu_in_reset(adev))
>> +       if (adev->in_gpu_reset)
>>                  return -EIO;
>>
>>          if (ring->sched.ready) {
>> @@ -633,8 +633,7 @@ static int gmc_v9_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
>>                  spin_unlock(&adev->gfx.kiq.ring_lock);
>>                  r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout);
>>                  if (r < 1) {
>> -                       dev_info(adev->dev,
>> -                               "wait for kiq fence error: %ld\n", r);
>> +                       DRM_ERROR("wait for kiq fence error: %ld.\n", r);
>>                          return -ETIME;
>>                  }
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
>> index fe31cbeccfe9..5fd67e1cc2a0 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
>> @@ -238,16 +238,20 @@ static void xgpu_ai_mailbox_flr_work(struct work_struct *work)
>>          struct amdgpu_virt *virt = container_of(work, struct amdgpu_virt, flr_work);
>>          struct amdgpu_device *adev = container_of(virt, struct amdgpu_device, virt);
>>          int timeout = AI_MAILBOX_POLL_FLR_TIMEDOUT;
>> +       int locked;
>>
>>          /* block amdgpu_gpu_recover till msg FLR COMPLETE received,
>>           * otherwise the mailbox msg will be ruined/reseted by
>>           * the VF FLR.
>>           *
>> -        * we can unlock the reset_sem to allow "amdgpu_job_timedout"
>> +        * we can unlock the lock_reset to allow "amdgpu_job_timedout"
>>           * to run gpu_recover() after FLR_NOTIFICATION_CMPL received
>>           * which means host side had finished this VF's FLR.
>>           */
>> -       down_read(&adev->reset_sem);
>> +       locked = mutex_trylock(&adev->lock_reset);
>> +       if (locked)
>> +               adev->in_gpu_reset = true;
>> +
>>          do {
>>                  if (xgpu_ai_mailbox_peek_msg(adev) == IDH_FLR_NOTIFICATION_CMPL)
>>                          goto flr_done;
>> @@ -257,7 +261,10 @@ static void xgpu_ai_mailbox_flr_work(struct work_struct *work)
>>          } while (timeout > 1);
>>
>>   flr_done:
>> -       up_read(&adev->reset_sem);
>> +       if (locked) {
>> +               adev->in_gpu_reset = false;
>> +               mutex_unlock(&adev->lock_reset);
>> +       }
>>
>>          /* Trigger recovery for world switch failure if no TDR */
>>          if (amdgpu_device_should_recover_gpu(adev)
>> diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c
>> index 6f55172e8337..ce2bf1fb79ed 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c
>> @@ -259,16 +259,20 @@ static void xgpu_nv_mailbox_flr_work(struct work_struct *work)
>>          struct amdgpu_virt *virt = container_of(work, struct amdgpu_virt, flr_work);
>>          struct amdgpu_device *adev = container_of(virt, struct amdgpu_device, virt);
>>          int timeout = NV_MAILBOX_POLL_FLR_TIMEDOUT;
>> +       int locked;
>>
>>          /* block amdgpu_gpu_recover till msg FLR COMPLETE received,
>>           * otherwise the mailbox msg will be ruined/reseted by
>>           * the VF FLR.
>>           *
>> -        * we can unlock the reset_sem to allow "amdgpu_job_timedout"
>> +        * we can unlock the lock_reset to allow "amdgpu_job_timedout"
>>           * to run gpu_recover() after FLR_NOTIFICATION_CMPL received
>>           * which means host side had finished this VF's FLR.
>>           */
>> -       down_read(&adev->reset_sem);
>> +       locked = mutex_trylock(&adev->lock_reset);
>> +       if (locked)
>> +               adev->in_gpu_reset = true;
>> +
>>          do {
>>                  if (xgpu_nv_mailbox_peek_msg(adev) == IDH_FLR_NOTIFICATION_CMPL)
>>                          goto flr_done;
>> @@ -278,7 +282,10 @@ static void xgpu_nv_mailbox_flr_work(struct work_struct *work)
>>          } while (timeout > 1);
>>
>>   flr_done:
>> -       up_read(&adev->reset_sem);
>> +       if (locked) {
>> +               adev->in_gpu_reset = false;
>> +               mutex_unlock(&adev->lock_reset);
>> +       }
>>
>>          /* Trigger recovery for world switch failure if no TDR */
>>          if (amdgpu_device_should_recover_gpu(adev)
>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
>> index 7ad1537820b5..e0e60b0d0669 100644
>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
>> @@ -304,17 +304,15 @@ static void deallocate_vmid(struct device_queue_manager *dqm,
>>                                  struct qcm_process_device *qpd,
>>                                  struct queue *q)
>>   {
>> -       if (!dqm->is_resetting) {
>> -               /* On GFX v7, CP doesn't flush TC at dequeue */
>> -               if (q->device->device_info->asic_family == CHIP_HAWAII)
>> -                       if (flush_texture_cache_nocpsch(q->device, qpd))
>> -                               pr_err("Failed to flush TC\n");
>> +       /* On GFX v7, CP doesn't flush TC at dequeue */
>> +       if (q->device->device_info->asic_family == CHIP_HAWAII)
>> +               if (flush_texture_cache_nocpsch(q->device, qpd))
>> +                       pr_err("Failed to flush TC\n");
>>
>> -               kfd_flush_tlb(qpd_to_pdd(qpd));
>> +       kfd_flush_tlb(qpd_to_pdd(qpd));
>>
>> -               /* Release the vmid mapping */
>> -               set_pasid_vmid_mapping(dqm, 0, qpd->vmid);
>> -       }
>> +       /* Release the vmid mapping */
>> +       set_pasid_vmid_mapping(dqm, 0, qpd->vmid);
>>          dqm->vmid_pasid[qpd->vmid] = 0;
>>
>>          qpd->vmid = 0;
>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
>> index 71be897d4c2a..013c2b018edc 100644
>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
>> @@ -1551,10 +1551,6 @@ int kfd_reserved_mem_mmap(struct kfd_dev *dev, struct kfd_process *process,
>>   void kfd_flush_tlb(struct kfd_process_device *pdd)
>>   {
>>          struct kfd_dev *dev = pdd->dev;
>> -       struct device_queue_manager *dqm = dev->dqm;
>> -
>> -       if (dqm->is_resetting)
>> -               return;
>>
>>          if (dev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) {
>>                  /* Nothing to flush until a VMID is assigned, which
>> diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
>> index ff5f7f7ceec6..c4daa22904da 100644
>> --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
>> +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
>> @@ -1658,7 +1658,7 @@ static int dm_suspend(void *handle)
>>          struct amdgpu_display_manager *dm = &adev->dm;
>>          int ret = 0;
>>
>> -       if (amdgpu_in_reset(adev)) {
>> +       if (adev->in_gpu_reset) {
>>                  mutex_lock(&dm->dc_lock);
>>                  dm->cached_dc_state = dc_copy_state(dm->dc->current_state);
>>
>> @@ -1844,7 +1844,7 @@ static int dm_resume(void *handle)
>>          struct dc_state *dc_state;
>>          int i, r, j;
>>
>> -       if (amdgpu_in_reset(adev)) {
>> +       if (adev->in_gpu_reset) {
>>                  dc_state = dm->cached_dc_state;
>>
>>                  r = dm_dmub_hw_init(adev);
>> diff --git a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c
>> index 1ffacc712e53..c8e30d59e658 100644
>> --- a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c
>> +++ b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c
>> @@ -1110,7 +1110,7 @@ static int smu_disable_dpms(struct smu_context *smu)
>>          struct amdgpu_device *adev = smu->adev;
>>          int ret = 0;
>>          bool use_baco = !smu->is_apu &&
>> -               ((amdgpu_in_reset(adev) &&
>> +               ((adev->in_gpu_reset &&
>>                    (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO)) ||
>>                   ((adev->in_runpm || adev->in_hibernate) && amdgpu_asic_supports_baco(adev)));
>>
>> diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c
>> index da84012b7fd5..c7216362b68d 100644
>> --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c
>> +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c
>> @@ -489,7 +489,7 @@ static int vega20_setup_asic_task(struct pp_hwmgr *hwmgr)
>>   {
>>          struct amdgpu_device *adev = (struct amdgpu_device *)(hwmgr->adev);
>>          int ret = 0;
>> -       bool use_baco = (amdgpu_in_reset(adev) &&
>> +       bool use_baco = (adev->in_gpu_reset &&
>>                           (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO)) ||
>>                  (adev->in_runpm && amdgpu_asic_supports_baco(adev));
>>
>> --
>> 2.17.1
>>
>> _______________________________________________
>> amd-gfx mailing list
>> amd-gfx@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] drm/amdgpu: revert "fix system hang issue during GPU reset"
  2020-08-12 15:55 ` Alex Deucher
  2020-08-13 10:58   ` Christian König
@ 2020-08-14  1:23   ` Matt Coffin
  1 sibling, 0 replies; 4+ messages in thread
From: Matt Coffin @ 2020-08-14  1:23 UTC (permalink / raw)
  To: Alex Deucher, Christian König; +Cc: amd-gfx list

On 8/12/20 9:55 AM, Alex Deucher wrote:
> This also broke GPU overclocking.

The fix for the `pp_od_clk_voltage` interface was actually quite simple,
and the bug was limited to that function. The patch just messed up the
return value (which was supposed to be the # of bytes consumed). It was
just returning 0 on success, which caused the userspace client to keep
on trying to send the data.

I suck at email lists, and don't know how to link it here, but I
submitted a patch just now to repair it, if that's the only blocker on this.

Thanks for the work guys,
Matt
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2020-08-14  1:23 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-08-12 15:53 [PATCH] drm/amdgpu: revert "fix system hang issue during GPU reset" Christian König
2020-08-12 15:55 ` Alex Deucher
2020-08-13 10:58   ` Christian König
2020-08-14  1:23   ` Matt Coffin

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.