All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] drm/amdgpu: restrict debugfs register access under SR-IOV
@ 2020-04-09  6:01 Yintian Tao
  2020-04-09 12:41 ` Christian König
  0 siblings, 1 reply; 8+ messages in thread
From: Yintian Tao @ 2020-04-09  6:01 UTC (permalink / raw)
  To: christian.koenig, Alexander.Deucher, emily.deng; +Cc: amd-gfx, Yintian Tao

Under bare metal, there is no more else to take
care of the GPU register access through MMIO.
Under Virtualization, to access GPU register is
implemented through KIQ during run-time due to
world-switch.

Therefore, under SR-IOV user can only access
debugfs to r/w GPU registers when meets all
three conditions below.
- amdgpu_gpu_recovery=0
- TDR happened
- in_gpu_reset=0

Signed-off-by: Yintian Tao <yttao@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 83 ++++++++++++++++++++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_job.c     |  7 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c    | 23 ++++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h    |  7 ++
 4 files changed, 114 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
index c0f9a651dc06..4f9780aabf5a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
@@ -152,11 +152,17 @@ static int  amdgpu_debugfs_process_reg_op(bool read, struct file *f,
 	if (r < 0)
 		return r;
 
+	if (!amdgpu_virt_can_access_debugfs(adev))
+		return -EINVAL;
+	else
+		amdgpu_virt_enable_access_debugfs(adev);
+
 	if (use_bank) {
 		if ((sh_bank != 0xFFFFFFFF && sh_bank >= adev->gfx.config.max_sh_per_se) ||
 		    (se_bank != 0xFFFFFFFF && se_bank >= adev->gfx.config.max_shader_engines)) {
 			pm_runtime_mark_last_busy(adev->ddev->dev);
 			pm_runtime_put_autosuspend(adev->ddev->dev);
+			amdgpu_virt_disable_access_debugfs(adev);
 			return -EINVAL;
 		}
 		mutex_lock(&adev->grbm_idx_mutex);
@@ -207,6 +213,7 @@ static int  amdgpu_debugfs_process_reg_op(bool read, struct file *f,
 	pm_runtime_mark_last_busy(adev->ddev->dev);
 	pm_runtime_put_autosuspend(adev->ddev->dev);
 
+	amdgpu_virt_disable_access_debugfs(adev);
 	return result;
 }
 
@@ -255,6 +262,11 @@ static ssize_t amdgpu_debugfs_regs_pcie_read(struct file *f, char __user *buf,
 	if (r < 0)
 		return r;
 
+	if (!amdgpu_virt_can_access_debugfs(adev))
+		return -EINVAL;
+	else
+		amdgpu_virt_enable_access_debugfs(adev);
+
 	while (size) {
 		uint32_t value;
 
@@ -263,6 +275,7 @@ static ssize_t amdgpu_debugfs_regs_pcie_read(struct file *f, char __user *buf,
 		if (r) {
 			pm_runtime_mark_last_busy(adev->ddev->dev);
 			pm_runtime_put_autosuspend(adev->ddev->dev);
+			amdgpu_virt_disable_access_debugfs(adev);
 			return r;
 		}
 
@@ -275,6 +288,7 @@ static ssize_t amdgpu_debugfs_regs_pcie_read(struct file *f, char __user *buf,
 	pm_runtime_mark_last_busy(adev->ddev->dev);
 	pm_runtime_put_autosuspend(adev->ddev->dev);
 
+	amdgpu_virt_disable_access_debugfs(adev);
 	return result;
 }
 
@@ -304,6 +318,11 @@ static ssize_t amdgpu_debugfs_regs_pcie_write(struct file *f, const char __user
 	if (r < 0)
 		return r;
 
+	if (!amdgpu_virt_can_access_debugfs(adev))
+		return -EINVAL;
+	else
+		amdgpu_virt_enable_access_debugfs(adev);
+
 	while (size) {
 		uint32_t value;
 
@@ -311,6 +330,7 @@ static ssize_t amdgpu_debugfs_regs_pcie_write(struct file *f, const char __user
 		if (r) {
 			pm_runtime_mark_last_busy(adev->ddev->dev);
 			pm_runtime_put_autosuspend(adev->ddev->dev);
+			amdgpu_virt_disable_access_debugfs(adev);
 			return r;
 		}
 
@@ -325,6 +345,7 @@ static ssize_t amdgpu_debugfs_regs_pcie_write(struct file *f, const char __user
 	pm_runtime_mark_last_busy(adev->ddev->dev);
 	pm_runtime_put_autosuspend(adev->ddev->dev);
 
+	amdgpu_virt_disable_access_debugfs(adev);
 	return result;
 }
 
@@ -354,6 +375,11 @@ static ssize_t amdgpu_debugfs_regs_didt_read(struct file *f, char __user *buf,
 	if (r < 0)
 		return r;
 
+	if (!amdgpu_virt_can_access_debugfs(adev))
+		return -EINVAL;
+	else
+		amdgpu_virt_enable_access_debugfs(adev);
+
 	while (size) {
 		uint32_t value;
 
@@ -362,6 +388,7 @@ static ssize_t amdgpu_debugfs_regs_didt_read(struct file *f, char __user *buf,
 		if (r) {
 			pm_runtime_mark_last_busy(adev->ddev->dev);
 			pm_runtime_put_autosuspend(adev->ddev->dev);
+			amdgpu_virt_disable_access_debugfs(adev);
 			return r;
 		}
 
@@ -374,6 +401,7 @@ static ssize_t amdgpu_debugfs_regs_didt_read(struct file *f, char __user *buf,
 	pm_runtime_mark_last_busy(adev->ddev->dev);
 	pm_runtime_put_autosuspend(adev->ddev->dev);
 
+	amdgpu_virt_disable_access_debugfs(adev);
 	return result;
 }
 
@@ -403,6 +431,11 @@ static ssize_t amdgpu_debugfs_regs_didt_write(struct file *f, const char __user
 	if (r < 0)
 		return r;
 
+	if (!amdgpu_virt_can_access_debugfs(adev))
+		return -EINVAL;
+	else
+		amdgpu_virt_enable_access_debugfs(adev);
+
 	while (size) {
 		uint32_t value;
 
@@ -410,6 +443,7 @@ static ssize_t amdgpu_debugfs_regs_didt_write(struct file *f, const char __user
 		if (r) {
 			pm_runtime_mark_last_busy(adev->ddev->dev);
 			pm_runtime_put_autosuspend(adev->ddev->dev);
+			amdgpu_virt_disable_access_debugfs(adev);
 			return r;
 		}
 
@@ -424,6 +458,7 @@ static ssize_t amdgpu_debugfs_regs_didt_write(struct file *f, const char __user
 	pm_runtime_mark_last_busy(adev->ddev->dev);
 	pm_runtime_put_autosuspend(adev->ddev->dev);
 
+	amdgpu_virt_disable_access_debugfs(adev);
 	return result;
 }
 
@@ -453,6 +488,11 @@ static ssize_t amdgpu_debugfs_regs_smc_read(struct file *f, char __user *buf,
 	if (r < 0)
 		return r;
 
+	if (!amdgpu_virt_can_access_debugfs(adev))
+		return -EINVAL;
+	else
+		amdgpu_virt_enable_access_debugfs(adev);
+
 	while (size) {
 		uint32_t value;
 
@@ -461,6 +501,7 @@ static ssize_t amdgpu_debugfs_regs_smc_read(struct file *f, char __user *buf,
 		if (r) {
 			pm_runtime_mark_last_busy(adev->ddev->dev);
 			pm_runtime_put_autosuspend(adev->ddev->dev);
+			amdgpu_virt_disable_access_debugfs(adev);
 			return r;
 		}
 
@@ -473,6 +514,7 @@ static ssize_t amdgpu_debugfs_regs_smc_read(struct file *f, char __user *buf,
 	pm_runtime_mark_last_busy(adev->ddev->dev);
 	pm_runtime_put_autosuspend(adev->ddev->dev);
 
+	amdgpu_virt_disable_access_debugfs(adev);
 	return result;
 }
 
@@ -502,6 +544,11 @@ static ssize_t amdgpu_debugfs_regs_smc_write(struct file *f, const char __user *
 	if (r < 0)
 		return r;
 
+	if (!amdgpu_virt_can_access_debugfs(adev))
+		return -EINVAL;
+	else
+		amdgpu_virt_enable_access_debugfs(adev);
+
 	while (size) {
 		uint32_t value;
 
@@ -509,6 +556,7 @@ static ssize_t amdgpu_debugfs_regs_smc_write(struct file *f, const char __user *
 		if (r) {
 			pm_runtime_mark_last_busy(adev->ddev->dev);
 			pm_runtime_put_autosuspend(adev->ddev->dev);
+			amdgpu_virt_disable_access_debugfs(adev);
 			return r;
 		}
 
@@ -523,6 +571,7 @@ static ssize_t amdgpu_debugfs_regs_smc_write(struct file *f, const char __user *
 	pm_runtime_mark_last_busy(adev->ddev->dev);
 	pm_runtime_put_autosuspend(adev->ddev->dev);
 
+	amdgpu_virt_disable_access_debugfs(adev);
 	return result;
 }
 
@@ -651,16 +700,25 @@ static ssize_t amdgpu_debugfs_sensor_read(struct file *f, char __user *buf,
 	if (r < 0)
 		return r;
 
+	if (!amdgpu_virt_can_access_debugfs(adev))
+		return -EINVAL;
+	else
+		amdgpu_virt_enable_access_debugfs(adev);
+
 	r = amdgpu_dpm_read_sensor(adev, idx, &values[0], &valuesize);
 
 	pm_runtime_mark_last_busy(adev->ddev->dev);
 	pm_runtime_put_autosuspend(adev->ddev->dev);
 
-	if (r)
+	if (r) {
+		amdgpu_virt_disable_access_debugfs(adev);
 		return r;
+	}
 
-	if (size > valuesize)
+	if (size > valuesize) {
+		amdgpu_virt_disable_access_debugfs(adev);
 		return -EINVAL;
+	}
 
 	outsize = 0;
 	x = 0;
@@ -673,6 +731,7 @@ static ssize_t amdgpu_debugfs_sensor_read(struct file *f, char __user *buf,
 		}
 	}
 
+	amdgpu_virt_disable_access_debugfs(adev);
 	return !r ? outsize : r;
 }
 
@@ -720,6 +779,11 @@ static ssize_t amdgpu_debugfs_wave_read(struct file *f, char __user *buf,
 	if (r < 0)
 		return r;
 
+	if (!amdgpu_virt_can_access_debugfs(adev))
+		return -EINVAL;
+	else
+		amdgpu_virt_enable_access_debugfs(adev);
+
 	/* switch to the specific se/sh/cu */
 	mutex_lock(&adev->grbm_idx_mutex);
 	amdgpu_gfx_select_se_sh(adev, se, sh, cu);
@@ -734,16 +798,20 @@ static ssize_t amdgpu_debugfs_wave_read(struct file *f, char __user *buf,
 	pm_runtime_mark_last_busy(adev->ddev->dev);
 	pm_runtime_put_autosuspend(adev->ddev->dev);
 
-	if (!x)
+	if (!x) {
+		amdgpu_virt_disable_access_debugfs(adev);
 		return -EINVAL;
+	}
 
 	while (size && (offset < x * 4)) {
 		uint32_t value;
 
 		value = data[offset >> 2];
 		r = put_user(value, (uint32_t *)buf);
-		if (r)
+		if (r) {
+			amdgpu_virt_disable_access_debugfs(adev);
 			return r;
+		}
 
 		result += 4;
 		buf += 4;
@@ -751,6 +819,7 @@ static ssize_t amdgpu_debugfs_wave_read(struct file *f, char __user *buf,
 		size -= 4;
 	}
 
+	amdgpu_virt_disable_access_debugfs(adev);
 	return result;
 }
 
@@ -805,6 +874,11 @@ static ssize_t amdgpu_debugfs_gpr_read(struct file *f, char __user *buf,
 	if (r < 0)
 		return r;
 
+	if (!amdgpu_virt_can_access_debugfs(adev))
+		return -EINVAL;
+	else
+		amdgpu_virt_enable_access_debugfs(adev);
+
 	/* switch to the specific se/sh/cu */
 	mutex_lock(&adev->grbm_idx_mutex);
 	amdgpu_gfx_select_se_sh(adev, se, sh, cu);
@@ -840,6 +914,7 @@ static ssize_t amdgpu_debugfs_gpr_read(struct file *f, char __user *buf,
 
 err:
 	kfree(data);
+	amdgpu_virt_disable_access_debugfs(adev);
 	return result;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
index 2b99f5952375..993b75dde5d2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -33,6 +33,7 @@ static void amdgpu_job_timedout(struct drm_sched_job *s_job)
 	struct amdgpu_ring *ring = to_amdgpu_ring(s_job->sched);
 	struct amdgpu_job *job = to_amdgpu_job(s_job);
 	struct amdgpu_task_info ti;
+	struct amdgpu_device *adev = ring->adev;
 
 	memset(&ti, 0, sizeof(struct amdgpu_task_info));
 
@@ -49,10 +50,12 @@ static void amdgpu_job_timedout(struct drm_sched_job *s_job)
 	DRM_ERROR("Process information: process %s pid %d thread %s pid %d\n",
 		  ti.process_name, ti.tgid, ti.task_name, ti.pid);
 
-	if (amdgpu_device_should_recover_gpu(ring->adev))
+	if (amdgpu_device_should_recover_gpu(ring->adev)) {
 		amdgpu_device_gpu_recover(ring->adev, job);
-	else
+	} else {
 		drm_sched_suspend_timeout(&ring->sched);
+		adev->virt.tdr_debug = true;
+	}
 }
 
 int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
index 4d06c79065bf..d0dfe99ebc75 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
@@ -334,3 +334,26 @@ void amdgpu_detect_virtualization(struct amdgpu_device *adev)
 			adev->virt.caps |= AMDGPU_PASSTHROUGH_MODE;
 	}
 }
+
+bool amdgpu_virt_can_access_debugfs(struct amdgpu_device *adev)
+{
+	if (!amdgpu_sriov_vf(adev))
+		return true;
+
+	if (amdgpu_sriov_is_debug(adev))
+		return true;
+
+	return false;
+}
+
+void amdgpu_virt_enable_access_debugfs(struct amdgpu_device *adev)
+{
+	if (amdgpu_sriov_vf(adev))
+		adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME;
+}
+
+void amdgpu_virt_disable_access_debugfs(struct amdgpu_device *adev)
+{
+	if (amdgpu_sriov_vf(adev))
+		adev->virt.caps |= AMDGPU_SRIOV_CAPS_RUNTIME;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
index f6ae3c656304..a01742b7bf12 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
@@ -265,6 +265,7 @@ struct amdgpu_virt {
 	uint32_t gim_feature;
 	uint32_t reg_access_mode;
 	int req_init_data_ver;
+	bool tdr_debug;
 };
 
 #define amdgpu_sriov_enabled(adev) \
@@ -296,6 +297,8 @@ static inline bool is_virtual_machine(void)
 
 #define amdgpu_sriov_is_pp_one_vf(adev) \
 	((adev)->virt.gim_feature & AMDGIM_FEATURE_PP_ONE_VF)
+#define amdgpu_sriov_is_debug(adev) \
+	((!adev->in_gpu_reset) && adev->virt.tdr_debug)
 
 bool amdgpu_virt_mmio_blocked(struct amdgpu_device *adev);
 void amdgpu_virt_init_setting(struct amdgpu_device *adev);
@@ -314,4 +317,8 @@ int amdgpu_virt_fw_reserve_get_checksum(void *obj, unsigned long obj_size,
 					unsigned int chksum);
 void amdgpu_virt_init_data_exchange(struct amdgpu_device *adev);
 void amdgpu_detect_virtualization(struct amdgpu_device *adev);
+
+bool amdgpu_virt_can_access_debugfs(struct amdgpu_device *adev);
+void amdgpu_virt_enable_access_debugfs(struct amdgpu_device *adev);
+void amdgpu_virt_disable_access_debugfs(struct amdgpu_device *adev);
 #endif
-- 
2.17.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH] drm/amdgpu: restrict debugfs register access under SR-IOV
  2020-04-09  6:01 [PATCH] drm/amdgpu: restrict debugfs register access under SR-IOV Yintian Tao
@ 2020-04-09 12:41 ` Christian König
  2020-04-09 13:42   ` Tao, Yintian
  0 siblings, 1 reply; 8+ messages in thread
From: Christian König @ 2020-04-09 12:41 UTC (permalink / raw)
  To: Yintian Tao, Alexander.Deucher, emily.deng; +Cc: amd-gfx

Am 09.04.20 um 08:01 schrieb Yintian Tao:
> Under bare metal, there is no more else to take
> care of the GPU register access through MMIO.
> Under Virtualization, to access GPU register is
> implemented through KIQ during run-time due to
> world-switch.
>
> Therefore, under SR-IOV user can only access
> debugfs to r/w GPU registers when meets all
> three conditions below.
> - amdgpu_gpu_recovery=0
> - TDR happened
> - in_gpu_reset=0
>
> Signed-off-by: Yintian Tao <yttao@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 83 ++++++++++++++++++++-
>   drivers/gpu/drm/amd/amdgpu/amdgpu_job.c     |  7 +-
>   drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c    | 23 ++++++
>   drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h    |  7 ++
>   4 files changed, 114 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
> index c0f9a651dc06..4f9780aabf5a 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
> @@ -152,11 +152,17 @@ static int  amdgpu_debugfs_process_reg_op(bool read, struct file *f,
>   	if (r < 0)
>   		return r;
>   
> +	if (!amdgpu_virt_can_access_debugfs(adev))
> +		return -EINVAL;
> +	else
> +		amdgpu_virt_enable_access_debugfs(adev);
> +

It would be better to merge these two functions together.

E.g. that amdgpu_virt_enable_access_debugfs() returns an error if we 
can't allow this.

And -EINVAL is maybe not the right thing here, since this is not caused 
by an invalid value.

Maybe use -EPERM instead.

Regards,
Christian.

>   	if (use_bank) {
>   		if ((sh_bank != 0xFFFFFFFF && sh_bank >= adev->gfx.config.max_sh_per_se) ||
>   		    (se_bank != 0xFFFFFFFF && se_bank >= adev->gfx.config.max_shader_engines)) {
>   			pm_runtime_mark_last_busy(adev->ddev->dev);
>   			pm_runtime_put_autosuspend(adev->ddev->dev);
> +			amdgpu_virt_disable_access_debugfs(adev);
>   			return -EINVAL;
>   		}
>   		mutex_lock(&adev->grbm_idx_mutex);
> @@ -207,6 +213,7 @@ static int  amdgpu_debugfs_process_reg_op(bool read, struct file *f,
>   	pm_runtime_mark_last_busy(adev->ddev->dev);
>   	pm_runtime_put_autosuspend(adev->ddev->dev);
>   
> +	amdgpu_virt_disable_access_debugfs(adev);
>   	return result;
>   }
>   
> @@ -255,6 +262,11 @@ static ssize_t amdgpu_debugfs_regs_pcie_read(struct file *f, char __user *buf,
>   	if (r < 0)
>   		return r;
>   
> +	if (!amdgpu_virt_can_access_debugfs(adev))
> +		return -EINVAL;
> +	else
> +		amdgpu_virt_enable_access_debugfs(adev);
> +
>   	while (size) {
>   		uint32_t value;
>   
> @@ -263,6 +275,7 @@ static ssize_t amdgpu_debugfs_regs_pcie_read(struct file *f, char __user *buf,
>   		if (r) {
>   			pm_runtime_mark_last_busy(adev->ddev->dev);
>   			pm_runtime_put_autosuspend(adev->ddev->dev);
> +			amdgpu_virt_disable_access_debugfs(adev);
>   			return r;
>   		}
>   
> @@ -275,6 +288,7 @@ static ssize_t amdgpu_debugfs_regs_pcie_read(struct file *f, char __user *buf,
>   	pm_runtime_mark_last_busy(adev->ddev->dev);
>   	pm_runtime_put_autosuspend(adev->ddev->dev);
>   
> +	amdgpu_virt_disable_access_debugfs(adev);
>   	return result;
>   }
>   
> @@ -304,6 +318,11 @@ static ssize_t amdgpu_debugfs_regs_pcie_write(struct file *f, const char __user
>   	if (r < 0)
>   		return r;
>   
> +	if (!amdgpu_virt_can_access_debugfs(adev))
> +		return -EINVAL;
> +	else
> +		amdgpu_virt_enable_access_debugfs(adev);
> +
>   	while (size) {
>   		uint32_t value;
>   
> @@ -311,6 +330,7 @@ static ssize_t amdgpu_debugfs_regs_pcie_write(struct file *f, const char __user
>   		if (r) {
>   			pm_runtime_mark_last_busy(adev->ddev->dev);
>   			pm_runtime_put_autosuspend(adev->ddev->dev);
> +			amdgpu_virt_disable_access_debugfs(adev);
>   			return r;
>   		}
>   
> @@ -325,6 +345,7 @@ static ssize_t amdgpu_debugfs_regs_pcie_write(struct file *f, const char __user
>   	pm_runtime_mark_last_busy(adev->ddev->dev);
>   	pm_runtime_put_autosuspend(adev->ddev->dev);
>   
> +	amdgpu_virt_disable_access_debugfs(adev);
>   	return result;
>   }
>   
> @@ -354,6 +375,11 @@ static ssize_t amdgpu_debugfs_regs_didt_read(struct file *f, char __user *buf,
>   	if (r < 0)
>   		return r;
>   
> +	if (!amdgpu_virt_can_access_debugfs(adev))
> +		return -EINVAL;
> +	else
> +		amdgpu_virt_enable_access_debugfs(adev);
> +
>   	while (size) {
>   		uint32_t value;
>   
> @@ -362,6 +388,7 @@ static ssize_t amdgpu_debugfs_regs_didt_read(struct file *f, char __user *buf,
>   		if (r) {
>   			pm_runtime_mark_last_busy(adev->ddev->dev);
>   			pm_runtime_put_autosuspend(adev->ddev->dev);
> +			amdgpu_virt_disable_access_debugfs(adev);
>   			return r;
>   		}
>   
> @@ -374,6 +401,7 @@ static ssize_t amdgpu_debugfs_regs_didt_read(struct file *f, char __user *buf,
>   	pm_runtime_mark_last_busy(adev->ddev->dev);
>   	pm_runtime_put_autosuspend(adev->ddev->dev);
>   
> +	amdgpu_virt_disable_access_debugfs(adev);
>   	return result;
>   }
>   
> @@ -403,6 +431,11 @@ static ssize_t amdgpu_debugfs_regs_didt_write(struct file *f, const char __user
>   	if (r < 0)
>   		return r;
>   
> +	if (!amdgpu_virt_can_access_debugfs(adev))
> +		return -EINVAL;
> +	else
> +		amdgpu_virt_enable_access_debugfs(adev);
> +
>   	while (size) {
>   		uint32_t value;
>   
> @@ -410,6 +443,7 @@ static ssize_t amdgpu_debugfs_regs_didt_write(struct file *f, const char __user
>   		if (r) {
>   			pm_runtime_mark_last_busy(adev->ddev->dev);
>   			pm_runtime_put_autosuspend(adev->ddev->dev);
> +			amdgpu_virt_disable_access_debugfs(adev);
>   			return r;
>   		}
>   
> @@ -424,6 +458,7 @@ static ssize_t amdgpu_debugfs_regs_didt_write(struct file *f, const char __user
>   	pm_runtime_mark_last_busy(adev->ddev->dev);
>   	pm_runtime_put_autosuspend(adev->ddev->dev);
>   
> +	amdgpu_virt_disable_access_debugfs(adev);
>   	return result;
>   }
>   
> @@ -453,6 +488,11 @@ static ssize_t amdgpu_debugfs_regs_smc_read(struct file *f, char __user *buf,
>   	if (r < 0)
>   		return r;
>   
> +	if (!amdgpu_virt_can_access_debugfs(adev))
> +		return -EINVAL;
> +	else
> +		amdgpu_virt_enable_access_debugfs(adev);
> +
>   	while (size) {
>   		uint32_t value;
>   
> @@ -461,6 +501,7 @@ static ssize_t amdgpu_debugfs_regs_smc_read(struct file *f, char __user *buf,
>   		if (r) {
>   			pm_runtime_mark_last_busy(adev->ddev->dev);
>   			pm_runtime_put_autosuspend(adev->ddev->dev);
> +			amdgpu_virt_disable_access_debugfs(adev);
>   			return r;
>   		}
>   
> @@ -473,6 +514,7 @@ static ssize_t amdgpu_debugfs_regs_smc_read(struct file *f, char __user *buf,
>   	pm_runtime_mark_last_busy(adev->ddev->dev);
>   	pm_runtime_put_autosuspend(adev->ddev->dev);
>   
> +	amdgpu_virt_disable_access_debugfs(adev);
>   	return result;
>   }
>   
> @@ -502,6 +544,11 @@ static ssize_t amdgpu_debugfs_regs_smc_write(struct file *f, const char __user *
>   	if (r < 0)
>   		return r;
>   
> +	if (!amdgpu_virt_can_access_debugfs(adev))
> +		return -EINVAL;
> +	else
> +		amdgpu_virt_enable_access_debugfs(adev);
> +
>   	while (size) {
>   		uint32_t value;
>   
> @@ -509,6 +556,7 @@ static ssize_t amdgpu_debugfs_regs_smc_write(struct file *f, const char __user *
>   		if (r) {
>   			pm_runtime_mark_last_busy(adev->ddev->dev);
>   			pm_runtime_put_autosuspend(adev->ddev->dev);
> +			amdgpu_virt_disable_access_debugfs(adev);
>   			return r;
>   		}
>   
> @@ -523,6 +571,7 @@ static ssize_t amdgpu_debugfs_regs_smc_write(struct file *f, const char __user *
>   	pm_runtime_mark_last_busy(adev->ddev->dev);
>   	pm_runtime_put_autosuspend(adev->ddev->dev);
>   
> +	amdgpu_virt_disable_access_debugfs(adev);
>   	return result;
>   }
>   
> @@ -651,16 +700,25 @@ static ssize_t amdgpu_debugfs_sensor_read(struct file *f, char __user *buf,
>   	if (r < 0)
>   		return r;
>   
> +	if (!amdgpu_virt_can_access_debugfs(adev))
> +		return -EINVAL;
> +	else
> +		amdgpu_virt_enable_access_debugfs(adev);
> +
>   	r = amdgpu_dpm_read_sensor(adev, idx, &values[0], &valuesize);
>   
>   	pm_runtime_mark_last_busy(adev->ddev->dev);
>   	pm_runtime_put_autosuspend(adev->ddev->dev);
>   
> -	if (r)
> +	if (r) {
> +		amdgpu_virt_disable_access_debugfs(adev);
>   		return r;
> +	}
>   
> -	if (size > valuesize)
> +	if (size > valuesize) {
> +		amdgpu_virt_disable_access_debugfs(adev);
>   		return -EINVAL;
> +	}
>   
>   	outsize = 0;
>   	x = 0;
> @@ -673,6 +731,7 @@ static ssize_t amdgpu_debugfs_sensor_read(struct file *f, char __user *buf,
>   		}
>   	}
>   
> +	amdgpu_virt_disable_access_debugfs(adev);
>   	return !r ? outsize : r;
>   }
>   
> @@ -720,6 +779,11 @@ static ssize_t amdgpu_debugfs_wave_read(struct file *f, char __user *buf,
>   	if (r < 0)
>   		return r;
>   
> +	if (!amdgpu_virt_can_access_debugfs(adev))
> +		return -EINVAL;
> +	else
> +		amdgpu_virt_enable_access_debugfs(adev);
> +
>   	/* switch to the specific se/sh/cu */
>   	mutex_lock(&adev->grbm_idx_mutex);
>   	amdgpu_gfx_select_se_sh(adev, se, sh, cu);
> @@ -734,16 +798,20 @@ static ssize_t amdgpu_debugfs_wave_read(struct file *f, char __user *buf,
>   	pm_runtime_mark_last_busy(adev->ddev->dev);
>   	pm_runtime_put_autosuspend(adev->ddev->dev);
>   
> -	if (!x)
> +	if (!x) {
> +		amdgpu_virt_disable_access_debugfs(adev);
>   		return -EINVAL;
> +	}
>   
>   	while (size && (offset < x * 4)) {
>   		uint32_t value;
>   
>   		value = data[offset >> 2];
>   		r = put_user(value, (uint32_t *)buf);
> -		if (r)
> +		if (r) {
> +			amdgpu_virt_disable_access_debugfs(adev);
>   			return r;
> +		}
>   
>   		result += 4;
>   		buf += 4;
> @@ -751,6 +819,7 @@ static ssize_t amdgpu_debugfs_wave_read(struct file *f, char __user *buf,
>   		size -= 4;
>   	}
>   
> +	amdgpu_virt_disable_access_debugfs(adev);
>   	return result;
>   }
>   
> @@ -805,6 +874,11 @@ static ssize_t amdgpu_debugfs_gpr_read(struct file *f, char __user *buf,
>   	if (r < 0)
>   		return r;
>   
> +	if (!amdgpu_virt_can_access_debugfs(adev))
> +		return -EINVAL;
> +	else
> +		amdgpu_virt_enable_access_debugfs(adev);
> +
>   	/* switch to the specific se/sh/cu */
>   	mutex_lock(&adev->grbm_idx_mutex);
>   	amdgpu_gfx_select_se_sh(adev, se, sh, cu);
> @@ -840,6 +914,7 @@ static ssize_t amdgpu_debugfs_gpr_read(struct file *f, char __user *buf,
>   
>   err:
>   	kfree(data);
> +	amdgpu_virt_disable_access_debugfs(adev);
>   	return result;
>   }
>   
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
> index 2b99f5952375..993b75dde5d2 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
> @@ -33,6 +33,7 @@ static void amdgpu_job_timedout(struct drm_sched_job *s_job)
>   	struct amdgpu_ring *ring = to_amdgpu_ring(s_job->sched);
>   	struct amdgpu_job *job = to_amdgpu_job(s_job);
>   	struct amdgpu_task_info ti;
> +	struct amdgpu_device *adev = ring->adev;
>   
>   	memset(&ti, 0, sizeof(struct amdgpu_task_info));
>   
> @@ -49,10 +50,12 @@ static void amdgpu_job_timedout(struct drm_sched_job *s_job)
>   	DRM_ERROR("Process information: process %s pid %d thread %s pid %d\n",
>   		  ti.process_name, ti.tgid, ti.task_name, ti.pid);
>   
> -	if (amdgpu_device_should_recover_gpu(ring->adev))
> +	if (amdgpu_device_should_recover_gpu(ring->adev)) {
>   		amdgpu_device_gpu_recover(ring->adev, job);
> -	else
> +	} else {
>   		drm_sched_suspend_timeout(&ring->sched);
> +		adev->virt.tdr_debug = true;
> +	}
>   }
>   
>   int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs,
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
> index 4d06c79065bf..d0dfe99ebc75 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
> @@ -334,3 +334,26 @@ void amdgpu_detect_virtualization(struct amdgpu_device *adev)
>   			adev->virt.caps |= AMDGPU_PASSTHROUGH_MODE;
>   	}
>   }
> +
> +bool amdgpu_virt_can_access_debugfs(struct amdgpu_device *adev)
> +{
> +	if (!amdgpu_sriov_vf(adev))
> +		return true;
> +
> +	if (amdgpu_sriov_is_debug(adev))
> +		return true;
> +
> +	return false;
> +}
> +
> +void amdgpu_virt_enable_access_debugfs(struct amdgpu_device *adev)
> +{
> +	if (amdgpu_sriov_vf(adev))
> +		adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME;
> +}
> +
> +void amdgpu_virt_disable_access_debugfs(struct amdgpu_device *adev)
> +{
> +	if (amdgpu_sriov_vf(adev))
> +		adev->virt.caps |= AMDGPU_SRIOV_CAPS_RUNTIME;
> +}
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
> index f6ae3c656304..a01742b7bf12 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
> @@ -265,6 +265,7 @@ struct amdgpu_virt {
>   	uint32_t gim_feature;
>   	uint32_t reg_access_mode;
>   	int req_init_data_ver;
> +	bool tdr_debug;
>   };
>   
>   #define amdgpu_sriov_enabled(adev) \
> @@ -296,6 +297,8 @@ static inline bool is_virtual_machine(void)
>   
>   #define amdgpu_sriov_is_pp_one_vf(adev) \
>   	((adev)->virt.gim_feature & AMDGIM_FEATURE_PP_ONE_VF)
> +#define amdgpu_sriov_is_debug(adev) \
> +	((!adev->in_gpu_reset) && adev->virt.tdr_debug)
>   
>   bool amdgpu_virt_mmio_blocked(struct amdgpu_device *adev);
>   void amdgpu_virt_init_setting(struct amdgpu_device *adev);
> @@ -314,4 +317,8 @@ int amdgpu_virt_fw_reserve_get_checksum(void *obj, unsigned long obj_size,
>   					unsigned int chksum);
>   void amdgpu_virt_init_data_exchange(struct amdgpu_device *adev);
>   void amdgpu_detect_virtualization(struct amdgpu_device *adev);
> +
> +bool amdgpu_virt_can_access_debugfs(struct amdgpu_device *adev);
> +void amdgpu_virt_enable_access_debugfs(struct amdgpu_device *adev);
> +void amdgpu_virt_disable_access_debugfs(struct amdgpu_device *adev);
>   #endif

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 8+ messages in thread

* RE: [PATCH] drm/amdgpu: restrict debugfs register access under SR-IOV
  2020-04-09 12:41 ` Christian König
@ 2020-04-09 13:42   ` Tao, Yintian
  0 siblings, 0 replies; 8+ messages in thread
From: Tao, Yintian @ 2020-04-09 13:42 UTC (permalink / raw)
  To: Koenig, Christian, Deucher, Alexander, Deng, Emily; +Cc: amd-gfx

Hi  Christian


Many thanks for your review. I will submit one new patch according to your suggestion.


Best Regards
Yintian Tao
-----Original Message-----
From: Koenig, Christian <Christian.Koenig@amd.com> 
Sent: 2020年4月9日 20:42
To: Tao, Yintian <Yintian.Tao@amd.com>; Deucher, Alexander <Alexander.Deucher@amd.com>; Deng, Emily <Emily.Deng@amd.com>
Cc: amd-gfx@lists.freedesktop.org
Subject: Re: [PATCH] drm/amdgpu: restrict debugfs register access under SR-IOV

Am 09.04.20 um 08:01 schrieb Yintian Tao:
> Under bare metal, there is no more else to take care of the GPU 
> register access through MMIO.
> Under Virtualization, to access GPU register is implemented through 
> KIQ during run-time due to world-switch.
>
> Therefore, under SR-IOV user can only access debugfs to r/w GPU 
> registers when meets all three conditions below.
> - amdgpu_gpu_recovery=0
> - TDR happened
> - in_gpu_reset=0
>
> Signed-off-by: Yintian Tao <yttao@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 83 ++++++++++++++++++++-
>   drivers/gpu/drm/amd/amdgpu/amdgpu_job.c     |  7 +-
>   drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c    | 23 ++++++
>   drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h    |  7 ++
>   4 files changed, 114 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
> index c0f9a651dc06..4f9780aabf5a 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
> @@ -152,11 +152,17 @@ static int  amdgpu_debugfs_process_reg_op(bool read, struct file *f,
>   	if (r < 0)
>   		return r;
>   
> +	if (!amdgpu_virt_can_access_debugfs(adev))
> +		return -EINVAL;
> +	else
> +		amdgpu_virt_enable_access_debugfs(adev);
> +

It would be better to merge these two functions together.

E.g. that amdgpu_virt_enable_access_debugfs() returns an error if we can't allow this.

And -EINVAL is maybe not the right thing here, since this is not caused by an invalid value.

Maybe use -EPERM instead.

Regards,
Christian.

>   	if (use_bank) {
>   		if ((sh_bank != 0xFFFFFFFF && sh_bank >= adev->gfx.config.max_sh_per_se) ||
>   		    (se_bank != 0xFFFFFFFF && se_bank >= adev->gfx.config.max_shader_engines)) {
>   			pm_runtime_mark_last_busy(adev->ddev->dev);
>   			pm_runtime_put_autosuspend(adev->ddev->dev);
> +			amdgpu_virt_disable_access_debugfs(adev);
>   			return -EINVAL;
>   		}
>   		mutex_lock(&adev->grbm_idx_mutex);
> @@ -207,6 +213,7 @@ static int  amdgpu_debugfs_process_reg_op(bool read, struct file *f,
>   	pm_runtime_mark_last_busy(adev->ddev->dev);
>   	pm_runtime_put_autosuspend(adev->ddev->dev);
>   
> +	amdgpu_virt_disable_access_debugfs(adev);
>   	return result;
>   }
>   
> @@ -255,6 +262,11 @@ static ssize_t amdgpu_debugfs_regs_pcie_read(struct file *f, char __user *buf,
>   	if (r < 0)
>   		return r;
>   
> +	if (!amdgpu_virt_can_access_debugfs(adev))
> +		return -EINVAL;
> +	else
> +		amdgpu_virt_enable_access_debugfs(adev);
> +
>   	while (size) {
>   		uint32_t value;
>   
> @@ -263,6 +275,7 @@ static ssize_t amdgpu_debugfs_regs_pcie_read(struct file *f, char __user *buf,
>   		if (r) {
>   			pm_runtime_mark_last_busy(adev->ddev->dev);
>   			pm_runtime_put_autosuspend(adev->ddev->dev);
> +			amdgpu_virt_disable_access_debugfs(adev);
>   			return r;
>   		}
>   
> @@ -275,6 +288,7 @@ static ssize_t amdgpu_debugfs_regs_pcie_read(struct file *f, char __user *buf,
>   	pm_runtime_mark_last_busy(adev->ddev->dev);
>   	pm_runtime_put_autosuspend(adev->ddev->dev);
>   
> +	amdgpu_virt_disable_access_debugfs(adev);
>   	return result;
>   }
>   
> @@ -304,6 +318,11 @@ static ssize_t amdgpu_debugfs_regs_pcie_write(struct file *f, const char __user
>   	if (r < 0)
>   		return r;
>   
> +	if (!amdgpu_virt_can_access_debugfs(adev))
> +		return -EINVAL;
> +	else
> +		amdgpu_virt_enable_access_debugfs(adev);
> +
>   	while (size) {
>   		uint32_t value;
>   
> @@ -311,6 +330,7 @@ static ssize_t amdgpu_debugfs_regs_pcie_write(struct file *f, const char __user
>   		if (r) {
>   			pm_runtime_mark_last_busy(adev->ddev->dev);
>   			pm_runtime_put_autosuspend(adev->ddev->dev);
> +			amdgpu_virt_disable_access_debugfs(adev);
>   			return r;
>   		}
>   
> @@ -325,6 +345,7 @@ static ssize_t amdgpu_debugfs_regs_pcie_write(struct file *f, const char __user
>   	pm_runtime_mark_last_busy(adev->ddev->dev);
>   	pm_runtime_put_autosuspend(adev->ddev->dev);
>   
> +	amdgpu_virt_disable_access_debugfs(adev);
>   	return result;
>   }
>   
> @@ -354,6 +375,11 @@ static ssize_t amdgpu_debugfs_regs_didt_read(struct file *f, char __user *buf,
>   	if (r < 0)
>   		return r;
>   
> +	if (!amdgpu_virt_can_access_debugfs(adev))
> +		return -EINVAL;
> +	else
> +		amdgpu_virt_enable_access_debugfs(adev);
> +
>   	while (size) {
>   		uint32_t value;
>   
> @@ -362,6 +388,7 @@ static ssize_t amdgpu_debugfs_regs_didt_read(struct file *f, char __user *buf,
>   		if (r) {
>   			pm_runtime_mark_last_busy(adev->ddev->dev);
>   			pm_runtime_put_autosuspend(adev->ddev->dev);
> +			amdgpu_virt_disable_access_debugfs(adev);
>   			return r;
>   		}
>   
> @@ -374,6 +401,7 @@ static ssize_t amdgpu_debugfs_regs_didt_read(struct file *f, char __user *buf,
>   	pm_runtime_mark_last_busy(adev->ddev->dev);
>   	pm_runtime_put_autosuspend(adev->ddev->dev);
>   
> +	amdgpu_virt_disable_access_debugfs(adev);
>   	return result;
>   }
>   
> @@ -403,6 +431,11 @@ static ssize_t amdgpu_debugfs_regs_didt_write(struct file *f, const char __user
>   	if (r < 0)
>   		return r;
>   
> +	if (!amdgpu_virt_can_access_debugfs(adev))
> +		return -EINVAL;
> +	else
> +		amdgpu_virt_enable_access_debugfs(adev);
> +
>   	while (size) {
>   		uint32_t value;
>   
> @@ -410,6 +443,7 @@ static ssize_t amdgpu_debugfs_regs_didt_write(struct file *f, const char __user
>   		if (r) {
>   			pm_runtime_mark_last_busy(adev->ddev->dev);
>   			pm_runtime_put_autosuspend(adev->ddev->dev);
> +			amdgpu_virt_disable_access_debugfs(adev);
>   			return r;
>   		}
>   
> @@ -424,6 +458,7 @@ static ssize_t amdgpu_debugfs_regs_didt_write(struct file *f, const char __user
>   	pm_runtime_mark_last_busy(adev->ddev->dev);
>   	pm_runtime_put_autosuspend(adev->ddev->dev);
>   
> +	amdgpu_virt_disable_access_debugfs(adev);
>   	return result;
>   }
>   
> @@ -453,6 +488,11 @@ static ssize_t amdgpu_debugfs_regs_smc_read(struct file *f, char __user *buf,
>   	if (r < 0)
>   		return r;
>   
> +	if (!amdgpu_virt_can_access_debugfs(adev))
> +		return -EINVAL;
> +	else
> +		amdgpu_virt_enable_access_debugfs(adev);
> +
>   	while (size) {
>   		uint32_t value;
>   
> @@ -461,6 +501,7 @@ static ssize_t amdgpu_debugfs_regs_smc_read(struct file *f, char __user *buf,
>   		if (r) {
>   			pm_runtime_mark_last_busy(adev->ddev->dev);
>   			pm_runtime_put_autosuspend(adev->ddev->dev);
> +			amdgpu_virt_disable_access_debugfs(adev);
>   			return r;
>   		}
>   
> @@ -473,6 +514,7 @@ static ssize_t amdgpu_debugfs_regs_smc_read(struct file *f, char __user *buf,
>   	pm_runtime_mark_last_busy(adev->ddev->dev);
>   	pm_runtime_put_autosuspend(adev->ddev->dev);
>   
> +	amdgpu_virt_disable_access_debugfs(adev);
>   	return result;
>   }
>   
> @@ -502,6 +544,11 @@ static ssize_t amdgpu_debugfs_regs_smc_write(struct file *f, const char __user *
>   	if (r < 0)
>   		return r;
>   
> +	if (!amdgpu_virt_can_access_debugfs(adev))
> +		return -EINVAL;
> +	else
> +		amdgpu_virt_enable_access_debugfs(adev);
> +
>   	while (size) {
>   		uint32_t value;
>   
> @@ -509,6 +556,7 @@ static ssize_t amdgpu_debugfs_regs_smc_write(struct file *f, const char __user *
>   		if (r) {
>   			pm_runtime_mark_last_busy(adev->ddev->dev);
>   			pm_runtime_put_autosuspend(adev->ddev->dev);
> +			amdgpu_virt_disable_access_debugfs(adev);
>   			return r;
>   		}
>   
> @@ -523,6 +571,7 @@ static ssize_t amdgpu_debugfs_regs_smc_write(struct file *f, const char __user *
>   	pm_runtime_mark_last_busy(adev->ddev->dev);
>   	pm_runtime_put_autosuspend(adev->ddev->dev);
>   
> +	amdgpu_virt_disable_access_debugfs(adev);
>   	return result;
>   }
>   
> @@ -651,16 +700,25 @@ static ssize_t amdgpu_debugfs_sensor_read(struct file *f, char __user *buf,
>   	if (r < 0)
>   		return r;
>   
> +	if (!amdgpu_virt_can_access_debugfs(adev))
> +		return -EINVAL;
> +	else
> +		amdgpu_virt_enable_access_debugfs(adev);
> +
>   	r = amdgpu_dpm_read_sensor(adev, idx, &values[0], &valuesize);
>   
>   	pm_runtime_mark_last_busy(adev->ddev->dev);
>   	pm_runtime_put_autosuspend(adev->ddev->dev);
>   
> -	if (r)
> +	if (r) {
> +		amdgpu_virt_disable_access_debugfs(adev);
>   		return r;
> +	}
>   
> -	if (size > valuesize)
> +	if (size > valuesize) {
> +		amdgpu_virt_disable_access_debugfs(adev);
>   		return -EINVAL;
> +	}
>   
>   	outsize = 0;
>   	x = 0;
> @@ -673,6 +731,7 @@ static ssize_t amdgpu_debugfs_sensor_read(struct file *f, char __user *buf,
>   		}
>   	}
>   
> +	amdgpu_virt_disable_access_debugfs(adev);
>   	return !r ? outsize : r;
>   }
>   
> @@ -720,6 +779,11 @@ static ssize_t amdgpu_debugfs_wave_read(struct file *f, char __user *buf,
>   	if (r < 0)
>   		return r;
>   
> +	if (!amdgpu_virt_can_access_debugfs(adev))
> +		return -EINVAL;
> +	else
> +		amdgpu_virt_enable_access_debugfs(adev);
> +
>   	/* switch to the specific se/sh/cu */
>   	mutex_lock(&adev->grbm_idx_mutex);
>   	amdgpu_gfx_select_se_sh(adev, se, sh, cu); @@ -734,16 +798,20 @@ 
> static ssize_t amdgpu_debugfs_wave_read(struct file *f, char __user *buf,
>   	pm_runtime_mark_last_busy(adev->ddev->dev);
>   	pm_runtime_put_autosuspend(adev->ddev->dev);
>   
> -	if (!x)
> +	if (!x) {
> +		amdgpu_virt_disable_access_debugfs(adev);
>   		return -EINVAL;
> +	}
>   
>   	while (size && (offset < x * 4)) {
>   		uint32_t value;
>   
>   		value = data[offset >> 2];
>   		r = put_user(value, (uint32_t *)buf);
> -		if (r)
> +		if (r) {
> +			amdgpu_virt_disable_access_debugfs(adev);
>   			return r;
> +		}
>   
>   		result += 4;
>   		buf += 4;
> @@ -751,6 +819,7 @@ static ssize_t amdgpu_debugfs_wave_read(struct file *f, char __user *buf,
>   		size -= 4;
>   	}
>   
> +	amdgpu_virt_disable_access_debugfs(adev);
>   	return result;
>   }
>   
> @@ -805,6 +874,11 @@ static ssize_t amdgpu_debugfs_gpr_read(struct file *f, char __user *buf,
>   	if (r < 0)
>   		return r;
>   
> +	if (!amdgpu_virt_can_access_debugfs(adev))
> +		return -EINVAL;
> +	else
> +		amdgpu_virt_enable_access_debugfs(adev);
> +
>   	/* switch to the specific se/sh/cu */
>   	mutex_lock(&adev->grbm_idx_mutex);
>   	amdgpu_gfx_select_se_sh(adev, se, sh, cu); @@ -840,6 +914,7 @@ 
> static ssize_t amdgpu_debugfs_gpr_read(struct file *f, char __user 
> *buf,
>   
>   err:
>   	kfree(data);
> +	amdgpu_virt_disable_access_debugfs(adev);
>   	return result;
>   }
>   
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
> index 2b99f5952375..993b75dde5d2 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
> @@ -33,6 +33,7 @@ static void amdgpu_job_timedout(struct drm_sched_job *s_job)
>   	struct amdgpu_ring *ring = to_amdgpu_ring(s_job->sched);
>   	struct amdgpu_job *job = to_amdgpu_job(s_job);
>   	struct amdgpu_task_info ti;
> +	struct amdgpu_device *adev = ring->adev;
>   
>   	memset(&ti, 0, sizeof(struct amdgpu_task_info));
>   
> @@ -49,10 +50,12 @@ static void amdgpu_job_timedout(struct drm_sched_job *s_job)
>   	DRM_ERROR("Process information: process %s pid %d thread %s pid %d\n",
>   		  ti.process_name, ti.tgid, ti.task_name, ti.pid);
>   
> -	if (amdgpu_device_should_recover_gpu(ring->adev))
> +	if (amdgpu_device_should_recover_gpu(ring->adev)) {
>   		amdgpu_device_gpu_recover(ring->adev, job);
> -	else
> +	} else {
>   		drm_sched_suspend_timeout(&ring->sched);
> +		adev->virt.tdr_debug = true;
> +	}
>   }
>   
>   int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs, 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
> index 4d06c79065bf..d0dfe99ebc75 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
> @@ -334,3 +334,26 @@ void amdgpu_detect_virtualization(struct amdgpu_device *adev)
>   			adev->virt.caps |= AMDGPU_PASSTHROUGH_MODE;
>   	}
>   }
> +
> +bool amdgpu_virt_can_access_debugfs(struct amdgpu_device *adev) {
> +	if (!amdgpu_sriov_vf(adev))
> +		return true;
> +
> +	if (amdgpu_sriov_is_debug(adev))
> +		return true;
> +
> +	return false;
> +}
> +
> +void amdgpu_virt_enable_access_debugfs(struct amdgpu_device *adev) {
> +	if (amdgpu_sriov_vf(adev))
> +		adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME; }
> +
> +void amdgpu_virt_disable_access_debugfs(struct amdgpu_device *adev) {
> +	if (amdgpu_sriov_vf(adev))
> +		adev->virt.caps |= AMDGPU_SRIOV_CAPS_RUNTIME; }
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
> index f6ae3c656304..a01742b7bf12 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
> @@ -265,6 +265,7 @@ struct amdgpu_virt {
>   	uint32_t gim_feature;
>   	uint32_t reg_access_mode;
>   	int req_init_data_ver;
> +	bool tdr_debug;
>   };
>   
>   #define amdgpu_sriov_enabled(adev) \ @@ -296,6 +297,8 @@ static 
> inline bool is_virtual_machine(void)
>   
>   #define amdgpu_sriov_is_pp_one_vf(adev) \
>   	((adev)->virt.gim_feature & AMDGIM_FEATURE_PP_ONE_VF)
> +#define amdgpu_sriov_is_debug(adev) \
> +	((!adev->in_gpu_reset) && adev->virt.tdr_debug)
>   
>   bool amdgpu_virt_mmio_blocked(struct amdgpu_device *adev);
>   void amdgpu_virt_init_setting(struct amdgpu_device *adev); @@ -314,4 
> +317,8 @@ int amdgpu_virt_fw_reserve_get_checksum(void *obj, unsigned long obj_size,
>   					unsigned int chksum);
>   void amdgpu_virt_init_data_exchange(struct amdgpu_device *adev);
>   void amdgpu_detect_virtualization(struct amdgpu_device *adev);
> +
> +bool amdgpu_virt_can_access_debugfs(struct amdgpu_device *adev); void 
> +amdgpu_virt_enable_access_debugfs(struct amdgpu_device *adev); void 
> +amdgpu_virt_disable_access_debugfs(struct amdgpu_device *adev);
>   #endif

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 8+ messages in thread

* RE: [PATCH] drm/amdgpu: restrict debugfs register access under SR-IOV
  2020-04-11  8:58     ` Liu, Monk
@ 2020-04-13  6:52       ` Tao, Yintian
  0 siblings, 0 replies; 8+ messages in thread
From: Tao, Yintian @ 2020-04-13  6:52 UTC (permalink / raw)
  To: Liu, Monk, Alex Deucher
  Cc: Deucher, Alexander, Koenig, Christian, amd-gfx list

Hi  Monk

Thanks for your review. I have changed the code according to your comments. Please help have a review again

Best Regards
Yintian Tao

-----Original Message-----
From: Liu, Monk <Monk.Liu@amd.com> 
Sent: 2020年4月11日 16:59
To: Tao, Yintian <Yintian.Tao@amd.com>; Alex Deucher <alexdeucher@gmail.com>
Cc: Deucher, Alexander <Alexander.Deucher@amd.com>; Koenig, Christian <Christian.Koenig@amd.com>; amd-gfx list <amd-gfx@lists.freedesktop.org>
Subject: RE: [PATCH] drm/amdgpu: restrict debugfs register access under SR-IOV

Hi Yintian

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
index c0f9a651dc06..4f9780aabf5a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
@@ -152,11 +152,17 @@ static int  amdgpu_debugfs_process_reg_op(bool read, struct file *f,
 	if (r < 0)
 		return r;
 
+	if (!amdgpu_virt_can_access_debugfs(adev))
+		return -EINVAL;
+	else
+		amdgpu_virt_enable_access_debugfs(adev);

You patch looks simply bail out if you are not under "debug" condition, but that looks weird: you are forbidding KIQ to do the debugfs access during non-debug condition , which is an overkill to me .

The ideal logic is :
1) When we are under "tdr_deubg" mode, we allow debugfs to handled by MMIO access, just like bare-metal
2) When we are not under "tdr_debug" mode (e.g.: no tdr triggered ) we shall still allow debugfs to work, but all the register access can go with KIQ way .

Looks you are dropping 2 totally  ... 

_____________________________________
Monk Liu|GPU Virtualization Team |AMD


-----Original Message-----
From: amd-gfx <amd-gfx-bounces@lists.freedesktop.org> On Behalf Of Tao, Yintian
Sent: Thursday, April 9, 2020 11:23 PM
To: Alex Deucher <alexdeucher@gmail.com>
Cc: Deucher, Alexander <Alexander.Deucher@amd.com>; Koenig, Christian <Christian.Koenig@amd.com>; amd-gfx list <amd-gfx@lists.freedesktop.org>
Subject: RE: [PATCH] drm/amdgpu: restrict debugfs register access under SR-IOV

Hi  Alex

Many thanks for your review.



-----Original Message-----
From: Alex Deucher <alexdeucher@gmail.com>
Sent: 2020年4月9日 23:21
To: Tao, Yintian <Yintian.Tao@amd.com>
Cc: Koenig, Christian <Christian.Koenig@amd.com>; Deucher, Alexander <Alexander.Deucher@amd.com>; amd-gfx list <amd-gfx@lists.freedesktop.org>
Subject: Re: [PATCH] drm/amdgpu: restrict debugfs register access under SR-IOV

On Thu, Apr 9, 2020 at 10:54 AM Yintian Tao <yttao@amd.com> wrote:
>
> Under bare metal, there is no more else to take care of the GPU 
> register access through MMIO.
> Under Virtualization, to access GPU register is implemented through 
> KIQ during run-time due to world-switch.
>
> Therefore, under SR-IOV user can only access debugfs to r/w GPU 
> registers when meets all three conditions below.
> - amdgpu_gpu_recovery=0
> - TDR happened
> - in_gpu_reset=0
>
> v2: merge amdgpu_virt_can_access_debugfs() into
>     amdgpu_virt_enable_access_debugfs()
>
> Signed-off-by: Yintian Tao <yttao@amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 73 +++++++++++++++++++--
>  drivers/gpu/drm/amd/amdgpu/amdgpu_job.c     |  8 ++-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c    | 26 ++++++++
>  drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h    |  7 ++
>  4 files changed, 108 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
> index c0f9a651dc06..1a4894fa3693 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
> @@ -152,11 +152,16 @@ static int  amdgpu_debugfs_process_reg_op(bool read, struct file *f,
>         if (r < 0)
>                 return r;
>
> +       r = amdgpu_virt_enable_access_debugfs(adev);
> +       if (r < 0)
> +               return r;
> +
>         if (use_bank) {
>                 if ((sh_bank != 0xFFFFFFFF && sh_bank >= adev->gfx.config.max_sh_per_se) ||
>                     (se_bank != 0xFFFFFFFF && se_bank >= adev->gfx.config.max_shader_engines)) {
>                         pm_runtime_mark_last_busy(adev->ddev->dev);
>                         pm_runtime_put_autosuspend(adev->ddev->dev);
> +                       amdgpu_virt_disable_access_debugfs(adev);
>                         return -EINVAL;
>                 }
>                 mutex_lock(&adev->grbm_idx_mutex);
> @@ -207,6 +212,7 @@ static int  amdgpu_debugfs_process_reg_op(bool read, struct file *f,
>         pm_runtime_mark_last_busy(adev->ddev->dev);
>         pm_runtime_put_autosuspend(adev->ddev->dev);
>
> +       amdgpu_virt_disable_access_debugfs(adev);
>         return result;
>  }
>
> @@ -255,6 +261,10 @@ static ssize_t amdgpu_debugfs_regs_pcie_read(struct file *f, char __user *buf,
>         if (r < 0)
>                 return r;
>
> +       r = amdgpu_virt_enable_access_debugfs(adev);
> +       if (r < 0)
> +               return r;
> +
>         while (size) {
>                 uint32_t value;
>
> @@ -263,6 +273,7 @@ static ssize_t amdgpu_debugfs_regs_pcie_read(struct file *f, char __user *buf,
>                 if (r) {
>                         pm_runtime_mark_last_busy(adev->ddev->dev);
>                         pm_runtime_put_autosuspend(adev->ddev->dev);
> +                       amdgpu_virt_disable_access_debugfs(adev);
>                         return r;
>                 }
>
> @@ -275,6 +286,7 @@ static ssize_t amdgpu_debugfs_regs_pcie_read(struct file *f, char __user *buf,
>         pm_runtime_mark_last_busy(adev->ddev->dev);
>         pm_runtime_put_autosuspend(adev->ddev->dev);
>
> +       amdgpu_virt_disable_access_debugfs(adev);
>         return result;
>  }
>
> @@ -304,6 +316,10 @@ static ssize_t amdgpu_debugfs_regs_pcie_write(struct file *f, const char __user
>         if (r < 0)
>                 return r;
>
> +       r = amdgpu_virt_enable_access_debugfs(adev);
> +       if (r < 0)
> +               return r;
> +
>         while (size) {
>                 uint32_t value;
>
> @@ -311,6 +327,7 @@ static ssize_t amdgpu_debugfs_regs_pcie_write(struct file *f, const char __user
>                 if (r) {
>                         pm_runtime_mark_last_busy(adev->ddev->dev);
>                         pm_runtime_put_autosuspend(adev->ddev->dev);
> +                       amdgpu_virt_disable_access_debugfs(adev);
>                         return r;
>                 }
>
> @@ -325,6 +342,7 @@ static ssize_t amdgpu_debugfs_regs_pcie_write(struct file *f, const char __user
>         pm_runtime_mark_last_busy(adev->ddev->dev);
>         pm_runtime_put_autosuspend(adev->ddev->dev);
>
> +       amdgpu_virt_disable_access_debugfs(adev);
>         return result;
>  }
>
> @@ -354,6 +372,10 @@ static ssize_t amdgpu_debugfs_regs_didt_read(struct file *f, char __user *buf,
>         if (r < 0)
>                 return r;
>
> +       r = amdgpu_virt_enable_access_debugfs(adev);
> +       if (r < 0)
> +               return r;
> +
>         while (size) {
>                 uint32_t value;
>
> @@ -362,6 +384,7 @@ static ssize_t amdgpu_debugfs_regs_didt_read(struct file *f, char __user *buf,
>                 if (r) {
>                         pm_runtime_mark_last_busy(adev->ddev->dev);
>                         pm_runtime_put_autosuspend(adev->ddev->dev);
> +                       amdgpu_virt_disable_access_debugfs(adev);
>                         return r;
>                 }
>
> @@ -374,6 +397,7 @@ static ssize_t amdgpu_debugfs_regs_didt_read(struct file *f, char __user *buf,
>         pm_runtime_mark_last_busy(adev->ddev->dev);
>         pm_runtime_put_autosuspend(adev->ddev->dev);
>
> +       amdgpu_virt_disable_access_debugfs(adev);
>         return result;
>  }
>
> @@ -403,6 +427,10 @@ static ssize_t amdgpu_debugfs_regs_didt_write(struct file *f, const char __user
>         if (r < 0)
>                 return r;
>
> +       r = amdgpu_virt_enable_access_debugfs(adev);
> +       if (r < 0)
> +               return r;
> +
>         while (size) {
>                 uint32_t value;
>
> @@ -410,6 +438,7 @@ static ssize_t amdgpu_debugfs_regs_didt_write(struct file *f, const char __user
>                 if (r) {
>                         pm_runtime_mark_last_busy(adev->ddev->dev);
>                         pm_runtime_put_autosuspend(adev->ddev->dev);
> +                       amdgpu_virt_disable_access_debugfs(adev);
>                         return r;
>                 }
>
> @@ -424,6 +453,7 @@ static ssize_t amdgpu_debugfs_regs_didt_write(struct file *f, const char __user
>         pm_runtime_mark_last_busy(adev->ddev->dev);
>         pm_runtime_put_autosuspend(adev->ddev->dev);
>
> +       amdgpu_virt_disable_access_debugfs(adev);
>         return result;
>  }
>
> @@ -453,6 +483,10 @@ static ssize_t amdgpu_debugfs_regs_smc_read(struct file *f, char __user *buf,
>         if (r < 0)
>                 return r;
>
> +       r = amdgpu_virt_enable_access_debugfs(adev);
> +       if (r < 0)
> +               return r;
> +
>         while (size) {
>                 uint32_t value;
>
> @@ -461,6 +495,7 @@ static ssize_t amdgpu_debugfs_regs_smc_read(struct file *f, char __user *buf,
>                 if (r) {
>                         pm_runtime_mark_last_busy(adev->ddev->dev);
>                         pm_runtime_put_autosuspend(adev->ddev->dev);
> +                       amdgpu_virt_disable_access_debugfs(adev);
>                         return r;
>                 }
>
> @@ -473,6 +508,7 @@ static ssize_t amdgpu_debugfs_regs_smc_read(struct file *f, char __user *buf,
>         pm_runtime_mark_last_busy(adev->ddev->dev);
>         pm_runtime_put_autosuspend(adev->ddev->dev);
>
> +       amdgpu_virt_disable_access_debugfs(adev);
>         return result;
>  }
>
> @@ -502,6 +538,10 @@ static ssize_t amdgpu_debugfs_regs_smc_write(struct file *f, const char __user *
>         if (r < 0)
>                 return r;
>
> +       r = amdgpu_virt_enable_access_debugfs(adev);
> +       if (r < 0)
> +               return r;
> +
>         while (size) {
>                 uint32_t value;
>
> @@ -509,6 +549,7 @@ static ssize_t amdgpu_debugfs_regs_smc_write(struct file *f, const char __user *
>                 if (r) {
>                         pm_runtime_mark_last_busy(adev->ddev->dev);
>                         pm_runtime_put_autosuspend(adev->ddev->dev);
> +                       amdgpu_virt_disable_access_debugfs(adev);
>                         return r;
>                 }
>
> @@ -523,6 +564,7 @@ static ssize_t amdgpu_debugfs_regs_smc_write(struct file *f, const char __user *
>         pm_runtime_mark_last_busy(adev->ddev->dev);
>         pm_runtime_put_autosuspend(adev->ddev->dev);
>
> +       amdgpu_virt_disable_access_debugfs(adev);
>         return result;
>  }
>
> @@ -651,16 +693,24 @@ static ssize_t amdgpu_debugfs_sensor_read(struct file *f, char __user *buf,
>         if (r < 0)
>                 return r;
>
> +       r = amdgpu_virt_enable_access_debugfs(adev);
> +       if (r < 0)
> +               return r;
> +
>         r = amdgpu_dpm_read_sensor(adev, idx, &values[0], &valuesize);
>
>         pm_runtime_mark_last_busy(adev->ddev->dev);
>         pm_runtime_put_autosuspend(adev->ddev->dev);
>
> -       if (r)
> +       if (r) {
> +               amdgpu_virt_disable_access_debugfs(adev);
>                 return r;
> +       }
>
> -       if (size > valuesize)
> +       if (size > valuesize) {
> +               amdgpu_virt_disable_access_debugfs(adev);
>                 return -EINVAL;
> +       }
>
>         outsize = 0;
>         x = 0;
> @@ -673,6 +723,7 @@ static ssize_t amdgpu_debugfs_sensor_read(struct file *f, char __user *buf,
>                 }
>         }
>
> +       amdgpu_virt_disable_access_debugfs(adev);
>         return !r ? outsize : r;
>  }
>
> @@ -720,6 +771,10 @@ static ssize_t amdgpu_debugfs_wave_read(struct file *f, char __user *buf,
>         if (r < 0)
>                 return r;
>
> +       r = amdgpu_virt_enable_access_debugfs(adev);
> +       if (r < 0)
> +               return r;
> +
>         /* switch to the specific se/sh/cu */
>         mutex_lock(&adev->grbm_idx_mutex);
>         amdgpu_gfx_select_se_sh(adev, se, sh, cu); @@ -734,16 +789,20 
> @@ static ssize_t amdgpu_debugfs_wave_read(struct file *f, char __user *buf,
>         pm_runtime_mark_last_busy(adev->ddev->dev);
>         pm_runtime_put_autosuspend(adev->ddev->dev);
>
> -       if (!x)
> +       if (!x) {
> +               amdgpu_virt_disable_access_debugfs(adev);
>                 return -EINVAL;
> +       }
>
>         while (size && (offset < x * 4)) {
>                 uint32_t value;
>
>                 value = data[offset >> 2];
>                 r = put_user(value, (uint32_t *)buf);
> -               if (r)
> +               if (r) {
> +                       amdgpu_virt_disable_access_debugfs(adev);
>                         return r;
> +               }
>
>                 result += 4;
>                 buf += 4;
> @@ -751,6 +810,7 @@ static ssize_t amdgpu_debugfs_wave_read(struct file *f, char __user *buf,
>                 size -= 4;
>         }
>
> +       amdgpu_virt_disable_access_debugfs(adev);
>         return result;
>  }
>
> @@ -805,6 +865,10 @@ static ssize_t amdgpu_debugfs_gpr_read(struct file *f, char __user *buf,
>         if (r < 0)
>                 return r;
>
> +       r = amdgpu_virt_enable_access_debugfs(adev);
> +       if (r < 0)
> +               return r;
> +
>         /* switch to the specific se/sh/cu */
>         mutex_lock(&adev->grbm_idx_mutex);
>         amdgpu_gfx_select_se_sh(adev, se, sh, cu); @@ -840,6 +904,7 @@ 
> static ssize_t amdgpu_debugfs_gpr_read(struct file *f, char __user 
> *buf,
>
>  err:
>         kfree(data);
> +       amdgpu_virt_disable_access_debugfs(adev);
>         return result;
>  }
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
> index 2b99f5952375..35c381ec0423 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
> @@ -33,6 +33,7 @@ static void amdgpu_job_timedout(struct drm_sched_job *s_job)
>         struct amdgpu_ring *ring = to_amdgpu_ring(s_job->sched);
>         struct amdgpu_job *job = to_amdgpu_job(s_job);
>         struct amdgpu_task_info ti;
> +       struct amdgpu_device *adev = ring->adev;
>
>         memset(&ti, 0, sizeof(struct amdgpu_task_info));
>
> @@ -49,10 +50,13 @@ static void amdgpu_job_timedout(struct drm_sched_job *s_job)
>         DRM_ERROR("Process information: process %s pid %d thread %s pid %d\n",
>                   ti.process_name, ti.tgid, ti.task_name, ti.pid);
>
> -       if (amdgpu_device_should_recover_gpu(ring->adev))
> +       if (amdgpu_device_should_recover_gpu(ring->adev)) {
>                 amdgpu_device_gpu_recover(ring->adev, job);
> -       else
> +       } else {
>                 drm_sched_suspend_timeout(&ring->sched);
> +               if (amdgpu_sriov_vf(adev))
> +                       adev->virt.tdr_debug = true;
> +       }
>  }
>
>  int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs, 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
> index 4d06c79065bf..e8266847675b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
> @@ -334,3 +334,29 @@ void amdgpu_detect_virtualization(struct amdgpu_device *adev)
>                         adev->virt.caps |= AMDGPU_PASSTHROUGH_MODE;
>         }
>  }
> +
> +bool amdgpu_virt_can_access_debugfs(struct amdgpu_device *adev) {
> +       return amdgpu_sriov_is_debug(adev) ? true : false; }
> +
> +int amdgpu_virt_enable_access_debugfs(struct amdgpu_device *adev) {
> +       int ret = 0;
> +
> +       if (!amdgpu_sriov_vf(adev))
> +               return ret;
> +
> +       if (amdgpu_virt_can_access_debugfs(adev))
> +               adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME;
> +       else
> +               ret = -EPERM;
> +
> +       return ret;

You can drop the ret variable in this function and just return constants for each case.  E.g.,

> +       if (!amdgpu_sriov_vf(adev))
> +               return 0;
> +
> +       if (amdgpu_virt_can_access_debugfs(adev))
> +               adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME;
> +       else
> +               return -EPERM;
> +
> +       return 0;

Other than that the patch looks good to me.
Acked-by: Alex Deucher <alexander.deucher@amd.com>

Alex

> +}
> +
> +void amdgpu_virt_disable_access_debugfs(struct amdgpu_device *adev) {
> +       if (amdgpu_sriov_vf(adev))
> +               adev->virt.caps |= AMDGPU_SRIOV_CAPS_RUNTIME; }
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
> index f6ae3c656304..8f20e6dbd7a9 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
> @@ -265,6 +265,7 @@ struct amdgpu_virt {
>         uint32_t gim_feature;
>         uint32_t reg_access_mode;
>         int req_init_data_ver;
> +       bool tdr_debug;
>  };
>
>  #define amdgpu_sriov_enabled(adev) \
> @@ -296,6 +297,8 @@ static inline bool is_virtual_machine(void)
>
>  #define amdgpu_sriov_is_pp_one_vf(adev) \
>         ((adev)->virt.gim_feature & AMDGIM_FEATURE_PP_ONE_VF)
> +#define amdgpu_sriov_is_debug(adev) \
> +       ((!adev->in_gpu_reset) && adev->virt.tdr_debug)
>
>  bool amdgpu_virt_mmio_blocked(struct amdgpu_device *adev);  void 
> amdgpu_virt_init_setting(struct amdgpu_device *adev); @@ -314,4 +317,8 
> @@ int amdgpu_virt_fw_reserve_get_checksum(void *obj, unsigned long obj_size,
>                                         unsigned int chksum);  void 
> amdgpu_virt_init_data_exchange(struct amdgpu_device *adev);  void 
> amdgpu_detect_virtualization(struct amdgpu_device *adev);
> +
> +bool amdgpu_virt_can_access_debugfs(struct amdgpu_device *adev); int 
> +amdgpu_virt_enable_access_debugfs(struct amdgpu_device *adev); void 
> +amdgpu_virt_disable_access_debugfs(struct amdgpu_device *adev);
>  #endif
> --
> 2.17.1
>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flist
> s.freedesktop.org%2Fmailman%2Flistinfo%2Famd-gfx&amp;data=02%7C01%7CYi
> ntian.Tao%40amd.com%7Cef3f5313116c4fd980f308d7dc99a4c3%7C3dd8961fe4884
> e608e11a82d994e183d%7C0%7C0%7C637220424774813978&amp;sdata=qZz3SK2%2F%
> 2FpN7JSfQNPG8AiyivEIBPncOhxEBM2WB5Rg%3D&amp;reserved=0
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Famd-gfx&amp;data=02%7C01%7Cmonk.liu%40amd.com%7C89a56984ece24933347808d7dc99e4f2%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637220426197692705&amp;sdata=X6tJSycRMvfdMgSh3z1g1qqPlCfl0lQ1ytTR0fnQrWw%3D&amp;reserved=0
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* RE: [PATCH] drm/amdgpu: restrict debugfs register access under SR-IOV
  2020-04-09 15:22   ` Tao, Yintian
@ 2020-04-11  8:58     ` Liu, Monk
  2020-04-13  6:52       ` Tao, Yintian
  0 siblings, 1 reply; 8+ messages in thread
From: Liu, Monk @ 2020-04-11  8:58 UTC (permalink / raw)
  To: Tao, Yintian, Alex Deucher
  Cc: Deucher, Alexander, Koenig, Christian, amd-gfx list

Hi Yintian

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
index c0f9a651dc06..4f9780aabf5a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
@@ -152,11 +152,17 @@ static int  amdgpu_debugfs_process_reg_op(bool read, struct file *f,
 	if (r < 0)
 		return r;
 
+	if (!amdgpu_virt_can_access_debugfs(adev))
+		return -EINVAL;
+	else
+		amdgpu_virt_enable_access_debugfs(adev);

You patch looks simply bail out if you are not under "debug" condition, but that looks weird: you are forbidding KIQ to do the debugfs access during non-debug condition , which is an overkill to me .

The ideal logic is :
1) When we are under "tdr_deubg" mode, we allow debugfs to handled by MMIO access, just like bare-metal
2) When we are not under "tdr_debug" mode (e.g.: no tdr triggered ) we shall still allow debugfs to work, but all the register access can go with KIQ way .

Looks you are dropping 2 totally  ... 

_____________________________________
Monk Liu|GPU Virtualization Team |AMD


-----Original Message-----
From: amd-gfx <amd-gfx-bounces@lists.freedesktop.org> On Behalf Of Tao, Yintian
Sent: Thursday, April 9, 2020 11:23 PM
To: Alex Deucher <alexdeucher@gmail.com>
Cc: Deucher, Alexander <Alexander.Deucher@amd.com>; Koenig, Christian <Christian.Koenig@amd.com>; amd-gfx list <amd-gfx@lists.freedesktop.org>
Subject: RE: [PATCH] drm/amdgpu: restrict debugfs register access under SR-IOV

Hi  Alex

Many thanks for your review.



-----Original Message-----
From: Alex Deucher <alexdeucher@gmail.com>
Sent: 2020年4月9日 23:21
To: Tao, Yintian <Yintian.Tao@amd.com>
Cc: Koenig, Christian <Christian.Koenig@amd.com>; Deucher, Alexander <Alexander.Deucher@amd.com>; amd-gfx list <amd-gfx@lists.freedesktop.org>
Subject: Re: [PATCH] drm/amdgpu: restrict debugfs register access under SR-IOV

On Thu, Apr 9, 2020 at 10:54 AM Yintian Tao <yttao@amd.com> wrote:
>
> Under bare metal, there is no more else to take care of the GPU 
> register access through MMIO.
> Under Virtualization, to access GPU register is implemented through 
> KIQ during run-time due to world-switch.
>
> Therefore, under SR-IOV user can only access debugfs to r/w GPU 
> registers when meets all three conditions below.
> - amdgpu_gpu_recovery=0
> - TDR happened
> - in_gpu_reset=0
>
> v2: merge amdgpu_virt_can_access_debugfs() into
>     amdgpu_virt_enable_access_debugfs()
>
> Signed-off-by: Yintian Tao <yttao@amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 73 +++++++++++++++++++--
>  drivers/gpu/drm/amd/amdgpu/amdgpu_job.c     |  8 ++-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c    | 26 ++++++++
>  drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h    |  7 ++
>  4 files changed, 108 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
> index c0f9a651dc06..1a4894fa3693 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
> @@ -152,11 +152,16 @@ static int  amdgpu_debugfs_process_reg_op(bool read, struct file *f,
>         if (r < 0)
>                 return r;
>
> +       r = amdgpu_virt_enable_access_debugfs(adev);
> +       if (r < 0)
> +               return r;
> +
>         if (use_bank) {
>                 if ((sh_bank != 0xFFFFFFFF && sh_bank >= adev->gfx.config.max_sh_per_se) ||
>                     (se_bank != 0xFFFFFFFF && se_bank >= adev->gfx.config.max_shader_engines)) {
>                         pm_runtime_mark_last_busy(adev->ddev->dev);
>                         pm_runtime_put_autosuspend(adev->ddev->dev);
> +                       amdgpu_virt_disable_access_debugfs(adev);
>                         return -EINVAL;
>                 }
>                 mutex_lock(&adev->grbm_idx_mutex);
> @@ -207,6 +212,7 @@ static int  amdgpu_debugfs_process_reg_op(bool read, struct file *f,
>         pm_runtime_mark_last_busy(adev->ddev->dev);
>         pm_runtime_put_autosuspend(adev->ddev->dev);
>
> +       amdgpu_virt_disable_access_debugfs(adev);
>         return result;
>  }
>
> @@ -255,6 +261,10 @@ static ssize_t amdgpu_debugfs_regs_pcie_read(struct file *f, char __user *buf,
>         if (r < 0)
>                 return r;
>
> +       r = amdgpu_virt_enable_access_debugfs(adev);
> +       if (r < 0)
> +               return r;
> +
>         while (size) {
>                 uint32_t value;
>
> @@ -263,6 +273,7 @@ static ssize_t amdgpu_debugfs_regs_pcie_read(struct file *f, char __user *buf,
>                 if (r) {
>                         pm_runtime_mark_last_busy(adev->ddev->dev);
>                         pm_runtime_put_autosuspend(adev->ddev->dev);
> +                       amdgpu_virt_disable_access_debugfs(adev);
>                         return r;
>                 }
>
> @@ -275,6 +286,7 @@ static ssize_t amdgpu_debugfs_regs_pcie_read(struct file *f, char __user *buf,
>         pm_runtime_mark_last_busy(adev->ddev->dev);
>         pm_runtime_put_autosuspend(adev->ddev->dev);
>
> +       amdgpu_virt_disable_access_debugfs(adev);
>         return result;
>  }
>
> @@ -304,6 +316,10 @@ static ssize_t amdgpu_debugfs_regs_pcie_write(struct file *f, const char __user
>         if (r < 0)
>                 return r;
>
> +       r = amdgpu_virt_enable_access_debugfs(adev);
> +       if (r < 0)
> +               return r;
> +
>         while (size) {
>                 uint32_t value;
>
> @@ -311,6 +327,7 @@ static ssize_t amdgpu_debugfs_regs_pcie_write(struct file *f, const char __user
>                 if (r) {
>                         pm_runtime_mark_last_busy(adev->ddev->dev);
>                         pm_runtime_put_autosuspend(adev->ddev->dev);
> +                       amdgpu_virt_disable_access_debugfs(adev);
>                         return r;
>                 }
>
> @@ -325,6 +342,7 @@ static ssize_t amdgpu_debugfs_regs_pcie_write(struct file *f, const char __user
>         pm_runtime_mark_last_busy(adev->ddev->dev);
>         pm_runtime_put_autosuspend(adev->ddev->dev);
>
> +       amdgpu_virt_disable_access_debugfs(adev);
>         return result;
>  }
>
> @@ -354,6 +372,10 @@ static ssize_t amdgpu_debugfs_regs_didt_read(struct file *f, char __user *buf,
>         if (r < 0)
>                 return r;
>
> +       r = amdgpu_virt_enable_access_debugfs(adev);
> +       if (r < 0)
> +               return r;
> +
>         while (size) {
>                 uint32_t value;
>
> @@ -362,6 +384,7 @@ static ssize_t amdgpu_debugfs_regs_didt_read(struct file *f, char __user *buf,
>                 if (r) {
>                         pm_runtime_mark_last_busy(adev->ddev->dev);
>                         pm_runtime_put_autosuspend(adev->ddev->dev);
> +                       amdgpu_virt_disable_access_debugfs(adev);
>                         return r;
>                 }
>
> @@ -374,6 +397,7 @@ static ssize_t amdgpu_debugfs_regs_didt_read(struct file *f, char __user *buf,
>         pm_runtime_mark_last_busy(adev->ddev->dev);
>         pm_runtime_put_autosuspend(adev->ddev->dev);
>
> +       amdgpu_virt_disable_access_debugfs(adev);
>         return result;
>  }
>
> @@ -403,6 +427,10 @@ static ssize_t amdgpu_debugfs_regs_didt_write(struct file *f, const char __user
>         if (r < 0)
>                 return r;
>
> +       r = amdgpu_virt_enable_access_debugfs(adev);
> +       if (r < 0)
> +               return r;
> +
>         while (size) {
>                 uint32_t value;
>
> @@ -410,6 +438,7 @@ static ssize_t amdgpu_debugfs_regs_didt_write(struct file *f, const char __user
>                 if (r) {
>                         pm_runtime_mark_last_busy(adev->ddev->dev);
>                         pm_runtime_put_autosuspend(adev->ddev->dev);
> +                       amdgpu_virt_disable_access_debugfs(adev);
>                         return r;
>                 }
>
> @@ -424,6 +453,7 @@ static ssize_t amdgpu_debugfs_regs_didt_write(struct file *f, const char __user
>         pm_runtime_mark_last_busy(adev->ddev->dev);
>         pm_runtime_put_autosuspend(adev->ddev->dev);
>
> +       amdgpu_virt_disable_access_debugfs(adev);
>         return result;
>  }
>
> @@ -453,6 +483,10 @@ static ssize_t amdgpu_debugfs_regs_smc_read(struct file *f, char __user *buf,
>         if (r < 0)
>                 return r;
>
> +       r = amdgpu_virt_enable_access_debugfs(adev);
> +       if (r < 0)
> +               return r;
> +
>         while (size) {
>                 uint32_t value;
>
> @@ -461,6 +495,7 @@ static ssize_t amdgpu_debugfs_regs_smc_read(struct file *f, char __user *buf,
>                 if (r) {
>                         pm_runtime_mark_last_busy(adev->ddev->dev);
>                         pm_runtime_put_autosuspend(adev->ddev->dev);
> +                       amdgpu_virt_disable_access_debugfs(adev);
>                         return r;
>                 }
>
> @@ -473,6 +508,7 @@ static ssize_t amdgpu_debugfs_regs_smc_read(struct file *f, char __user *buf,
>         pm_runtime_mark_last_busy(adev->ddev->dev);
>         pm_runtime_put_autosuspend(adev->ddev->dev);
>
> +       amdgpu_virt_disable_access_debugfs(adev);
>         return result;
>  }
>
> @@ -502,6 +538,10 @@ static ssize_t amdgpu_debugfs_regs_smc_write(struct file *f, const char __user *
>         if (r < 0)
>                 return r;
>
> +       r = amdgpu_virt_enable_access_debugfs(adev);
> +       if (r < 0)
> +               return r;
> +
>         while (size) {
>                 uint32_t value;
>
> @@ -509,6 +549,7 @@ static ssize_t amdgpu_debugfs_regs_smc_write(struct file *f, const char __user *
>                 if (r) {
>                         pm_runtime_mark_last_busy(adev->ddev->dev);
>                         pm_runtime_put_autosuspend(adev->ddev->dev);
> +                       amdgpu_virt_disable_access_debugfs(adev);
>                         return r;
>                 }
>
> @@ -523,6 +564,7 @@ static ssize_t amdgpu_debugfs_regs_smc_write(struct file *f, const char __user *
>         pm_runtime_mark_last_busy(adev->ddev->dev);
>         pm_runtime_put_autosuspend(adev->ddev->dev);
>
> +       amdgpu_virt_disable_access_debugfs(adev);
>         return result;
>  }
>
> @@ -651,16 +693,24 @@ static ssize_t amdgpu_debugfs_sensor_read(struct file *f, char __user *buf,
>         if (r < 0)
>                 return r;
>
> +       r = amdgpu_virt_enable_access_debugfs(adev);
> +       if (r < 0)
> +               return r;
> +
>         r = amdgpu_dpm_read_sensor(adev, idx, &values[0], &valuesize);
>
>         pm_runtime_mark_last_busy(adev->ddev->dev);
>         pm_runtime_put_autosuspend(adev->ddev->dev);
>
> -       if (r)
> +       if (r) {
> +               amdgpu_virt_disable_access_debugfs(adev);
>                 return r;
> +       }
>
> -       if (size > valuesize)
> +       if (size > valuesize) {
> +               amdgpu_virt_disable_access_debugfs(adev);
>                 return -EINVAL;
> +       }
>
>         outsize = 0;
>         x = 0;
> @@ -673,6 +723,7 @@ static ssize_t amdgpu_debugfs_sensor_read(struct file *f, char __user *buf,
>                 }
>         }
>
> +       amdgpu_virt_disable_access_debugfs(adev);
>         return !r ? outsize : r;
>  }
>
> @@ -720,6 +771,10 @@ static ssize_t amdgpu_debugfs_wave_read(struct file *f, char __user *buf,
>         if (r < 0)
>                 return r;
>
> +       r = amdgpu_virt_enable_access_debugfs(adev);
> +       if (r < 0)
> +               return r;
> +
>         /* switch to the specific se/sh/cu */
>         mutex_lock(&adev->grbm_idx_mutex);
>         amdgpu_gfx_select_se_sh(adev, se, sh, cu); @@ -734,16 +789,20 
> @@ static ssize_t amdgpu_debugfs_wave_read(struct file *f, char __user *buf,
>         pm_runtime_mark_last_busy(adev->ddev->dev);
>         pm_runtime_put_autosuspend(adev->ddev->dev);
>
> -       if (!x)
> +       if (!x) {
> +               amdgpu_virt_disable_access_debugfs(adev);
>                 return -EINVAL;
> +       }
>
>         while (size && (offset < x * 4)) {
>                 uint32_t value;
>
>                 value = data[offset >> 2];
>                 r = put_user(value, (uint32_t *)buf);
> -               if (r)
> +               if (r) {
> +                       amdgpu_virt_disable_access_debugfs(adev);
>                         return r;
> +               }
>
>                 result += 4;
>                 buf += 4;
> @@ -751,6 +810,7 @@ static ssize_t amdgpu_debugfs_wave_read(struct file *f, char __user *buf,
>                 size -= 4;
>         }
>
> +       amdgpu_virt_disable_access_debugfs(adev);
>         return result;
>  }
>
> @@ -805,6 +865,10 @@ static ssize_t amdgpu_debugfs_gpr_read(struct file *f, char __user *buf,
>         if (r < 0)
>                 return r;
>
> +       r = amdgpu_virt_enable_access_debugfs(adev);
> +       if (r < 0)
> +               return r;
> +
>         /* switch to the specific se/sh/cu */
>         mutex_lock(&adev->grbm_idx_mutex);
>         amdgpu_gfx_select_se_sh(adev, se, sh, cu); @@ -840,6 +904,7 @@ 
> static ssize_t amdgpu_debugfs_gpr_read(struct file *f, char __user 
> *buf,
>
>  err:
>         kfree(data);
> +       amdgpu_virt_disable_access_debugfs(adev);
>         return result;
>  }
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
> index 2b99f5952375..35c381ec0423 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
> @@ -33,6 +33,7 @@ static void amdgpu_job_timedout(struct drm_sched_job *s_job)
>         struct amdgpu_ring *ring = to_amdgpu_ring(s_job->sched);
>         struct amdgpu_job *job = to_amdgpu_job(s_job);
>         struct amdgpu_task_info ti;
> +       struct amdgpu_device *adev = ring->adev;
>
>         memset(&ti, 0, sizeof(struct amdgpu_task_info));
>
> @@ -49,10 +50,13 @@ static void amdgpu_job_timedout(struct drm_sched_job *s_job)
>         DRM_ERROR("Process information: process %s pid %d thread %s pid %d\n",
>                   ti.process_name, ti.tgid, ti.task_name, ti.pid);
>
> -       if (amdgpu_device_should_recover_gpu(ring->adev))
> +       if (amdgpu_device_should_recover_gpu(ring->adev)) {
>                 amdgpu_device_gpu_recover(ring->adev, job);
> -       else
> +       } else {
>                 drm_sched_suspend_timeout(&ring->sched);
> +               if (amdgpu_sriov_vf(adev))
> +                       adev->virt.tdr_debug = true;
> +       }
>  }
>
>  int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs, 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
> index 4d06c79065bf..e8266847675b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
> @@ -334,3 +334,29 @@ void amdgpu_detect_virtualization(struct amdgpu_device *adev)
>                         adev->virt.caps |= AMDGPU_PASSTHROUGH_MODE;
>         }
>  }
> +
> +bool amdgpu_virt_can_access_debugfs(struct amdgpu_device *adev) {
> +       return amdgpu_sriov_is_debug(adev) ? true : false; }
> +
> +int amdgpu_virt_enable_access_debugfs(struct amdgpu_device *adev) {
> +       int ret = 0;
> +
> +       if (!amdgpu_sriov_vf(adev))
> +               return ret;
> +
> +       if (amdgpu_virt_can_access_debugfs(adev))
> +               adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME;
> +       else
> +               ret = -EPERM;
> +
> +       return ret;

You can drop the ret variable in this function and just return constants for each case.  E.g.,

> +       if (!amdgpu_sriov_vf(adev))
> +               return 0;
> +
> +       if (amdgpu_virt_can_access_debugfs(adev))
> +               adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME;
> +       else
> +               return -EPERM;
> +
> +       return 0;

Other than that the patch looks good to me.
Acked-by: Alex Deucher <alexander.deucher@amd.com>

Alex

> +}
> +
> +void amdgpu_virt_disable_access_debugfs(struct amdgpu_device *adev) {
> +       if (amdgpu_sriov_vf(adev))
> +               adev->virt.caps |= AMDGPU_SRIOV_CAPS_RUNTIME; }
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
> index f6ae3c656304..8f20e6dbd7a9 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
> @@ -265,6 +265,7 @@ struct amdgpu_virt {
>         uint32_t gim_feature;
>         uint32_t reg_access_mode;
>         int req_init_data_ver;
> +       bool tdr_debug;
>  };
>
>  #define amdgpu_sriov_enabled(adev) \
> @@ -296,6 +297,8 @@ static inline bool is_virtual_machine(void)
>
>  #define amdgpu_sriov_is_pp_one_vf(adev) \
>         ((adev)->virt.gim_feature & AMDGIM_FEATURE_PP_ONE_VF)
> +#define amdgpu_sriov_is_debug(adev) \
> +       ((!adev->in_gpu_reset) && adev->virt.tdr_debug)
>
>  bool amdgpu_virt_mmio_blocked(struct amdgpu_device *adev);  void 
> amdgpu_virt_init_setting(struct amdgpu_device *adev); @@ -314,4 +317,8 
> @@ int amdgpu_virt_fw_reserve_get_checksum(void *obj, unsigned long obj_size,
>                                         unsigned int chksum);  void 
> amdgpu_virt_init_data_exchange(struct amdgpu_device *adev);  void 
> amdgpu_detect_virtualization(struct amdgpu_device *adev);
> +
> +bool amdgpu_virt_can_access_debugfs(struct amdgpu_device *adev); int 
> +amdgpu_virt_enable_access_debugfs(struct amdgpu_device *adev); void 
> +amdgpu_virt_disable_access_debugfs(struct amdgpu_device *adev);
>  #endif
> --
> 2.17.1
>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flist
> s.freedesktop.org%2Fmailman%2Flistinfo%2Famd-gfx&amp;data=02%7C01%7CYi
> ntian.Tao%40amd.com%7Cef3f5313116c4fd980f308d7dc99a4c3%7C3dd8961fe4884
> e608e11a82d994e183d%7C0%7C0%7C637220424774813978&amp;sdata=qZz3SK2%2F%
> 2FpN7JSfQNPG8AiyivEIBPncOhxEBM2WB5Rg%3D&amp;reserved=0
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Famd-gfx&amp;data=02%7C01%7Cmonk.liu%40amd.com%7C89a56984ece24933347808d7dc99e4f2%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637220426197692705&amp;sdata=X6tJSycRMvfdMgSh3z1g1qqPlCfl0lQ1ytTR0fnQrWw%3D&amp;reserved=0
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* RE: [PATCH] drm/amdgpu: restrict debugfs register access under SR-IOV
  2020-04-09 15:21 ` Alex Deucher
@ 2020-04-09 15:22   ` Tao, Yintian
  2020-04-11  8:58     ` Liu, Monk
  0 siblings, 1 reply; 8+ messages in thread
From: Tao, Yintian @ 2020-04-09 15:22 UTC (permalink / raw)
  To: Alex Deucher; +Cc: Deucher, Alexander, Koenig, Christian, amd-gfx list

Hi  Alex

Many thanks for your review.



-----Original Message-----
From: Alex Deucher <alexdeucher@gmail.com> 
Sent: 2020年4月9日 23:21
To: Tao, Yintian <Yintian.Tao@amd.com>
Cc: Koenig, Christian <Christian.Koenig@amd.com>; Deucher, Alexander <Alexander.Deucher@amd.com>; amd-gfx list <amd-gfx@lists.freedesktop.org>
Subject: Re: [PATCH] drm/amdgpu: restrict debugfs register access under SR-IOV

On Thu, Apr 9, 2020 at 10:54 AM Yintian Tao <yttao@amd.com> wrote:
>
> Under bare metal, there is no more else to take care of the GPU 
> register access through MMIO.
> Under Virtualization, to access GPU register is implemented through 
> KIQ during run-time due to world-switch.
>
> Therefore, under SR-IOV user can only access debugfs to r/w GPU 
> registers when meets all three conditions below.
> - amdgpu_gpu_recovery=0
> - TDR happened
> - in_gpu_reset=0
>
> v2: merge amdgpu_virt_can_access_debugfs() into
>     amdgpu_virt_enable_access_debugfs()
>
> Signed-off-by: Yintian Tao <yttao@amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 73 +++++++++++++++++++--
>  drivers/gpu/drm/amd/amdgpu/amdgpu_job.c     |  8 ++-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c    | 26 ++++++++
>  drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h    |  7 ++
>  4 files changed, 108 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
> index c0f9a651dc06..1a4894fa3693 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
> @@ -152,11 +152,16 @@ static int  amdgpu_debugfs_process_reg_op(bool read, struct file *f,
>         if (r < 0)
>                 return r;
>
> +       r = amdgpu_virt_enable_access_debugfs(adev);
> +       if (r < 0)
> +               return r;
> +
>         if (use_bank) {
>                 if ((sh_bank != 0xFFFFFFFF && sh_bank >= adev->gfx.config.max_sh_per_se) ||
>                     (se_bank != 0xFFFFFFFF && se_bank >= adev->gfx.config.max_shader_engines)) {
>                         pm_runtime_mark_last_busy(adev->ddev->dev);
>                         pm_runtime_put_autosuspend(adev->ddev->dev);
> +                       amdgpu_virt_disable_access_debugfs(adev);
>                         return -EINVAL;
>                 }
>                 mutex_lock(&adev->grbm_idx_mutex);
> @@ -207,6 +212,7 @@ static int  amdgpu_debugfs_process_reg_op(bool read, struct file *f,
>         pm_runtime_mark_last_busy(adev->ddev->dev);
>         pm_runtime_put_autosuspend(adev->ddev->dev);
>
> +       amdgpu_virt_disable_access_debugfs(adev);
>         return result;
>  }
>
> @@ -255,6 +261,10 @@ static ssize_t amdgpu_debugfs_regs_pcie_read(struct file *f, char __user *buf,
>         if (r < 0)
>                 return r;
>
> +       r = amdgpu_virt_enable_access_debugfs(adev);
> +       if (r < 0)
> +               return r;
> +
>         while (size) {
>                 uint32_t value;
>
> @@ -263,6 +273,7 @@ static ssize_t amdgpu_debugfs_regs_pcie_read(struct file *f, char __user *buf,
>                 if (r) {
>                         pm_runtime_mark_last_busy(adev->ddev->dev);
>                         pm_runtime_put_autosuspend(adev->ddev->dev);
> +                       amdgpu_virt_disable_access_debugfs(adev);
>                         return r;
>                 }
>
> @@ -275,6 +286,7 @@ static ssize_t amdgpu_debugfs_regs_pcie_read(struct file *f, char __user *buf,
>         pm_runtime_mark_last_busy(adev->ddev->dev);
>         pm_runtime_put_autosuspend(adev->ddev->dev);
>
> +       amdgpu_virt_disable_access_debugfs(adev);
>         return result;
>  }
>
> @@ -304,6 +316,10 @@ static ssize_t amdgpu_debugfs_regs_pcie_write(struct file *f, const char __user
>         if (r < 0)
>                 return r;
>
> +       r = amdgpu_virt_enable_access_debugfs(adev);
> +       if (r < 0)
> +               return r;
> +
>         while (size) {
>                 uint32_t value;
>
> @@ -311,6 +327,7 @@ static ssize_t amdgpu_debugfs_regs_pcie_write(struct file *f, const char __user
>                 if (r) {
>                         pm_runtime_mark_last_busy(adev->ddev->dev);
>                         pm_runtime_put_autosuspend(adev->ddev->dev);
> +                       amdgpu_virt_disable_access_debugfs(adev);
>                         return r;
>                 }
>
> @@ -325,6 +342,7 @@ static ssize_t amdgpu_debugfs_regs_pcie_write(struct file *f, const char __user
>         pm_runtime_mark_last_busy(adev->ddev->dev);
>         pm_runtime_put_autosuspend(adev->ddev->dev);
>
> +       amdgpu_virt_disable_access_debugfs(adev);
>         return result;
>  }
>
> @@ -354,6 +372,10 @@ static ssize_t amdgpu_debugfs_regs_didt_read(struct file *f, char __user *buf,
>         if (r < 0)
>                 return r;
>
> +       r = amdgpu_virt_enable_access_debugfs(adev);
> +       if (r < 0)
> +               return r;
> +
>         while (size) {
>                 uint32_t value;
>
> @@ -362,6 +384,7 @@ static ssize_t amdgpu_debugfs_regs_didt_read(struct file *f, char __user *buf,
>                 if (r) {
>                         pm_runtime_mark_last_busy(adev->ddev->dev);
>                         pm_runtime_put_autosuspend(adev->ddev->dev);
> +                       amdgpu_virt_disable_access_debugfs(adev);
>                         return r;
>                 }
>
> @@ -374,6 +397,7 @@ static ssize_t amdgpu_debugfs_regs_didt_read(struct file *f, char __user *buf,
>         pm_runtime_mark_last_busy(adev->ddev->dev);
>         pm_runtime_put_autosuspend(adev->ddev->dev);
>
> +       amdgpu_virt_disable_access_debugfs(adev);
>         return result;
>  }
>
> @@ -403,6 +427,10 @@ static ssize_t amdgpu_debugfs_regs_didt_write(struct file *f, const char __user
>         if (r < 0)
>                 return r;
>
> +       r = amdgpu_virt_enable_access_debugfs(adev);
> +       if (r < 0)
> +               return r;
> +
>         while (size) {
>                 uint32_t value;
>
> @@ -410,6 +438,7 @@ static ssize_t amdgpu_debugfs_regs_didt_write(struct file *f, const char __user
>                 if (r) {
>                         pm_runtime_mark_last_busy(adev->ddev->dev);
>                         pm_runtime_put_autosuspend(adev->ddev->dev);
> +                       amdgpu_virt_disable_access_debugfs(adev);
>                         return r;
>                 }
>
> @@ -424,6 +453,7 @@ static ssize_t amdgpu_debugfs_regs_didt_write(struct file *f, const char __user
>         pm_runtime_mark_last_busy(adev->ddev->dev);
>         pm_runtime_put_autosuspend(adev->ddev->dev);
>
> +       amdgpu_virt_disable_access_debugfs(adev);
>         return result;
>  }
>
> @@ -453,6 +483,10 @@ static ssize_t amdgpu_debugfs_regs_smc_read(struct file *f, char __user *buf,
>         if (r < 0)
>                 return r;
>
> +       r = amdgpu_virt_enable_access_debugfs(adev);
> +       if (r < 0)
> +               return r;
> +
>         while (size) {
>                 uint32_t value;
>
> @@ -461,6 +495,7 @@ static ssize_t amdgpu_debugfs_regs_smc_read(struct file *f, char __user *buf,
>                 if (r) {
>                         pm_runtime_mark_last_busy(adev->ddev->dev);
>                         pm_runtime_put_autosuspend(adev->ddev->dev);
> +                       amdgpu_virt_disable_access_debugfs(adev);
>                         return r;
>                 }
>
> @@ -473,6 +508,7 @@ static ssize_t amdgpu_debugfs_regs_smc_read(struct file *f, char __user *buf,
>         pm_runtime_mark_last_busy(adev->ddev->dev);
>         pm_runtime_put_autosuspend(adev->ddev->dev);
>
> +       amdgpu_virt_disable_access_debugfs(adev);
>         return result;
>  }
>
> @@ -502,6 +538,10 @@ static ssize_t amdgpu_debugfs_regs_smc_write(struct file *f, const char __user *
>         if (r < 0)
>                 return r;
>
> +       r = amdgpu_virt_enable_access_debugfs(adev);
> +       if (r < 0)
> +               return r;
> +
>         while (size) {
>                 uint32_t value;
>
> @@ -509,6 +549,7 @@ static ssize_t amdgpu_debugfs_regs_smc_write(struct file *f, const char __user *
>                 if (r) {
>                         pm_runtime_mark_last_busy(adev->ddev->dev);
>                         pm_runtime_put_autosuspend(adev->ddev->dev);
> +                       amdgpu_virt_disable_access_debugfs(adev);
>                         return r;
>                 }
>
> @@ -523,6 +564,7 @@ static ssize_t amdgpu_debugfs_regs_smc_write(struct file *f, const char __user *
>         pm_runtime_mark_last_busy(adev->ddev->dev);
>         pm_runtime_put_autosuspend(adev->ddev->dev);
>
> +       amdgpu_virt_disable_access_debugfs(adev);
>         return result;
>  }
>
> @@ -651,16 +693,24 @@ static ssize_t amdgpu_debugfs_sensor_read(struct file *f, char __user *buf,
>         if (r < 0)
>                 return r;
>
> +       r = amdgpu_virt_enable_access_debugfs(adev);
> +       if (r < 0)
> +               return r;
> +
>         r = amdgpu_dpm_read_sensor(adev, idx, &values[0], &valuesize);
>
>         pm_runtime_mark_last_busy(adev->ddev->dev);
>         pm_runtime_put_autosuspend(adev->ddev->dev);
>
> -       if (r)
> +       if (r) {
> +               amdgpu_virt_disable_access_debugfs(adev);
>                 return r;
> +       }
>
> -       if (size > valuesize)
> +       if (size > valuesize) {
> +               amdgpu_virt_disable_access_debugfs(adev);
>                 return -EINVAL;
> +       }
>
>         outsize = 0;
>         x = 0;
> @@ -673,6 +723,7 @@ static ssize_t amdgpu_debugfs_sensor_read(struct file *f, char __user *buf,
>                 }
>         }
>
> +       amdgpu_virt_disable_access_debugfs(adev);
>         return !r ? outsize : r;
>  }
>
> @@ -720,6 +771,10 @@ static ssize_t amdgpu_debugfs_wave_read(struct file *f, char __user *buf,
>         if (r < 0)
>                 return r;
>
> +       r = amdgpu_virt_enable_access_debugfs(adev);
> +       if (r < 0)
> +               return r;
> +
>         /* switch to the specific se/sh/cu */
>         mutex_lock(&adev->grbm_idx_mutex);
>         amdgpu_gfx_select_se_sh(adev, se, sh, cu); @@ -734,16 +789,20 
> @@ static ssize_t amdgpu_debugfs_wave_read(struct file *f, char __user *buf,
>         pm_runtime_mark_last_busy(adev->ddev->dev);
>         pm_runtime_put_autosuspend(adev->ddev->dev);
>
> -       if (!x)
> +       if (!x) {
> +               amdgpu_virt_disable_access_debugfs(adev);
>                 return -EINVAL;
> +       }
>
>         while (size && (offset < x * 4)) {
>                 uint32_t value;
>
>                 value = data[offset >> 2];
>                 r = put_user(value, (uint32_t *)buf);
> -               if (r)
> +               if (r) {
> +                       amdgpu_virt_disable_access_debugfs(adev);
>                         return r;
> +               }
>
>                 result += 4;
>                 buf += 4;
> @@ -751,6 +810,7 @@ static ssize_t amdgpu_debugfs_wave_read(struct file *f, char __user *buf,
>                 size -= 4;
>         }
>
> +       amdgpu_virt_disable_access_debugfs(adev);
>         return result;
>  }
>
> @@ -805,6 +865,10 @@ static ssize_t amdgpu_debugfs_gpr_read(struct file *f, char __user *buf,
>         if (r < 0)
>                 return r;
>
> +       r = amdgpu_virt_enable_access_debugfs(adev);
> +       if (r < 0)
> +               return r;
> +
>         /* switch to the specific se/sh/cu */
>         mutex_lock(&adev->grbm_idx_mutex);
>         amdgpu_gfx_select_se_sh(adev, se, sh, cu); @@ -840,6 +904,7 @@ 
> static ssize_t amdgpu_debugfs_gpr_read(struct file *f, char __user 
> *buf,
>
>  err:
>         kfree(data);
> +       amdgpu_virt_disable_access_debugfs(adev);
>         return result;
>  }
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
> index 2b99f5952375..35c381ec0423 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
> @@ -33,6 +33,7 @@ static void amdgpu_job_timedout(struct drm_sched_job *s_job)
>         struct amdgpu_ring *ring = to_amdgpu_ring(s_job->sched);
>         struct amdgpu_job *job = to_amdgpu_job(s_job);
>         struct amdgpu_task_info ti;
> +       struct amdgpu_device *adev = ring->adev;
>
>         memset(&ti, 0, sizeof(struct amdgpu_task_info));
>
> @@ -49,10 +50,13 @@ static void amdgpu_job_timedout(struct drm_sched_job *s_job)
>         DRM_ERROR("Process information: process %s pid %d thread %s pid %d\n",
>                   ti.process_name, ti.tgid, ti.task_name, ti.pid);
>
> -       if (amdgpu_device_should_recover_gpu(ring->adev))
> +       if (amdgpu_device_should_recover_gpu(ring->adev)) {
>                 amdgpu_device_gpu_recover(ring->adev, job);
> -       else
> +       } else {
>                 drm_sched_suspend_timeout(&ring->sched);
> +               if (amdgpu_sriov_vf(adev))
> +                       adev->virt.tdr_debug = true;
> +       }
>  }
>
>  int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs, 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
> index 4d06c79065bf..e8266847675b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
> @@ -334,3 +334,29 @@ void amdgpu_detect_virtualization(struct amdgpu_device *adev)
>                         adev->virt.caps |= AMDGPU_PASSTHROUGH_MODE;
>         }
>  }
> +
> +bool amdgpu_virt_can_access_debugfs(struct amdgpu_device *adev) {
> +       return amdgpu_sriov_is_debug(adev) ? true : false; }
> +
> +int amdgpu_virt_enable_access_debugfs(struct amdgpu_device *adev) {
> +       int ret = 0;
> +
> +       if (!amdgpu_sriov_vf(adev))
> +               return ret;
> +
> +       if (amdgpu_virt_can_access_debugfs(adev))
> +               adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME;
> +       else
> +               ret = -EPERM;
> +
> +       return ret;

You can drop the ret variable in this function and just return constants for each case.  E.g.,

> +       if (!amdgpu_sriov_vf(adev))
> +               return 0;
> +
> +       if (amdgpu_virt_can_access_debugfs(adev))
> +               adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME;
> +       else
> +               return -EPERM;
> +
> +       return 0;

Other than that the patch looks good to me.
Acked-by: Alex Deucher <alexander.deucher@amd.com>

Alex

> +}
> +
> +void amdgpu_virt_disable_access_debugfs(struct amdgpu_device *adev) {
> +       if (amdgpu_sriov_vf(adev))
> +               adev->virt.caps |= AMDGPU_SRIOV_CAPS_RUNTIME; }
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
> index f6ae3c656304..8f20e6dbd7a9 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
> @@ -265,6 +265,7 @@ struct amdgpu_virt {
>         uint32_t gim_feature;
>         uint32_t reg_access_mode;
>         int req_init_data_ver;
> +       bool tdr_debug;
>  };
>
>  #define amdgpu_sriov_enabled(adev) \
> @@ -296,6 +297,8 @@ static inline bool is_virtual_machine(void)
>
>  #define amdgpu_sriov_is_pp_one_vf(adev) \
>         ((adev)->virt.gim_feature & AMDGIM_FEATURE_PP_ONE_VF)
> +#define amdgpu_sriov_is_debug(adev) \
> +       ((!adev->in_gpu_reset) && adev->virt.tdr_debug)
>
>  bool amdgpu_virt_mmio_blocked(struct amdgpu_device *adev);  void 
> amdgpu_virt_init_setting(struct amdgpu_device *adev); @@ -314,4 +317,8 
> @@ int amdgpu_virt_fw_reserve_get_checksum(void *obj, unsigned long obj_size,
>                                         unsigned int chksum);  void 
> amdgpu_virt_init_data_exchange(struct amdgpu_device *adev);  void 
> amdgpu_detect_virtualization(struct amdgpu_device *adev);
> +
> +bool amdgpu_virt_can_access_debugfs(struct amdgpu_device *adev); int 
> +amdgpu_virt_enable_access_debugfs(struct amdgpu_device *adev); void 
> +amdgpu_virt_disable_access_debugfs(struct amdgpu_device *adev);
>  #endif
> --
> 2.17.1
>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flist
> s.freedesktop.org%2Fmailman%2Flistinfo%2Famd-gfx&amp;data=02%7C01%7CYi
> ntian.Tao%40amd.com%7Cef3f5313116c4fd980f308d7dc99a4c3%7C3dd8961fe4884
> e608e11a82d994e183d%7C0%7C0%7C637220424774813978&amp;sdata=qZz3SK2%2F%
> 2FpN7JSfQNPG8AiyivEIBPncOhxEBM2WB5Rg%3D&amp;reserved=0
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] drm/amdgpu: restrict debugfs register access under SR-IOV
  2020-04-09 14:54 Yintian Tao
@ 2020-04-09 15:21 ` Alex Deucher
  2020-04-09 15:22   ` Tao, Yintian
  0 siblings, 1 reply; 8+ messages in thread
From: Alex Deucher @ 2020-04-09 15:21 UTC (permalink / raw)
  To: Yintian Tao; +Cc: Deucher, Alexander, Christian Koenig, amd-gfx list

On Thu, Apr 9, 2020 at 10:54 AM Yintian Tao <yttao@amd.com> wrote:
>
> Under bare metal, there is no more else to take
> care of the GPU register access through MMIO.
> Under Virtualization, to access GPU register is
> implemented through KIQ during run-time due to
> world-switch.
>
> Therefore, under SR-IOV user can only access
> debugfs to r/w GPU registers when meets all
> three conditions below.
> - amdgpu_gpu_recovery=0
> - TDR happened
> - in_gpu_reset=0
>
> v2: merge amdgpu_virt_can_access_debugfs() into
>     amdgpu_virt_enable_access_debugfs()
>
> Signed-off-by: Yintian Tao <yttao@amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 73 +++++++++++++++++++--
>  drivers/gpu/drm/amd/amdgpu/amdgpu_job.c     |  8 ++-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c    | 26 ++++++++
>  drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h    |  7 ++
>  4 files changed, 108 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
> index c0f9a651dc06..1a4894fa3693 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
> @@ -152,11 +152,16 @@ static int  amdgpu_debugfs_process_reg_op(bool read, struct file *f,
>         if (r < 0)
>                 return r;
>
> +       r = amdgpu_virt_enable_access_debugfs(adev);
> +       if (r < 0)
> +               return r;
> +
>         if (use_bank) {
>                 if ((sh_bank != 0xFFFFFFFF && sh_bank >= adev->gfx.config.max_sh_per_se) ||
>                     (se_bank != 0xFFFFFFFF && se_bank >= adev->gfx.config.max_shader_engines)) {
>                         pm_runtime_mark_last_busy(adev->ddev->dev);
>                         pm_runtime_put_autosuspend(adev->ddev->dev);
> +                       amdgpu_virt_disable_access_debugfs(adev);
>                         return -EINVAL;
>                 }
>                 mutex_lock(&adev->grbm_idx_mutex);
> @@ -207,6 +212,7 @@ static int  amdgpu_debugfs_process_reg_op(bool read, struct file *f,
>         pm_runtime_mark_last_busy(adev->ddev->dev);
>         pm_runtime_put_autosuspend(adev->ddev->dev);
>
> +       amdgpu_virt_disable_access_debugfs(adev);
>         return result;
>  }
>
> @@ -255,6 +261,10 @@ static ssize_t amdgpu_debugfs_regs_pcie_read(struct file *f, char __user *buf,
>         if (r < 0)
>                 return r;
>
> +       r = amdgpu_virt_enable_access_debugfs(adev);
> +       if (r < 0)
> +               return r;
> +
>         while (size) {
>                 uint32_t value;
>
> @@ -263,6 +273,7 @@ static ssize_t amdgpu_debugfs_regs_pcie_read(struct file *f, char __user *buf,
>                 if (r) {
>                         pm_runtime_mark_last_busy(adev->ddev->dev);
>                         pm_runtime_put_autosuspend(adev->ddev->dev);
> +                       amdgpu_virt_disable_access_debugfs(adev);
>                         return r;
>                 }
>
> @@ -275,6 +286,7 @@ static ssize_t amdgpu_debugfs_regs_pcie_read(struct file *f, char __user *buf,
>         pm_runtime_mark_last_busy(adev->ddev->dev);
>         pm_runtime_put_autosuspend(adev->ddev->dev);
>
> +       amdgpu_virt_disable_access_debugfs(adev);
>         return result;
>  }
>
> @@ -304,6 +316,10 @@ static ssize_t amdgpu_debugfs_regs_pcie_write(struct file *f, const char __user
>         if (r < 0)
>                 return r;
>
> +       r = amdgpu_virt_enable_access_debugfs(adev);
> +       if (r < 0)
> +               return r;
> +
>         while (size) {
>                 uint32_t value;
>
> @@ -311,6 +327,7 @@ static ssize_t amdgpu_debugfs_regs_pcie_write(struct file *f, const char __user
>                 if (r) {
>                         pm_runtime_mark_last_busy(adev->ddev->dev);
>                         pm_runtime_put_autosuspend(adev->ddev->dev);
> +                       amdgpu_virt_disable_access_debugfs(adev);
>                         return r;
>                 }
>
> @@ -325,6 +342,7 @@ static ssize_t amdgpu_debugfs_regs_pcie_write(struct file *f, const char __user
>         pm_runtime_mark_last_busy(adev->ddev->dev);
>         pm_runtime_put_autosuspend(adev->ddev->dev);
>
> +       amdgpu_virt_disable_access_debugfs(adev);
>         return result;
>  }
>
> @@ -354,6 +372,10 @@ static ssize_t amdgpu_debugfs_regs_didt_read(struct file *f, char __user *buf,
>         if (r < 0)
>                 return r;
>
> +       r = amdgpu_virt_enable_access_debugfs(adev);
> +       if (r < 0)
> +               return r;
> +
>         while (size) {
>                 uint32_t value;
>
> @@ -362,6 +384,7 @@ static ssize_t amdgpu_debugfs_regs_didt_read(struct file *f, char __user *buf,
>                 if (r) {
>                         pm_runtime_mark_last_busy(adev->ddev->dev);
>                         pm_runtime_put_autosuspend(adev->ddev->dev);
> +                       amdgpu_virt_disable_access_debugfs(adev);
>                         return r;
>                 }
>
> @@ -374,6 +397,7 @@ static ssize_t amdgpu_debugfs_regs_didt_read(struct file *f, char __user *buf,
>         pm_runtime_mark_last_busy(adev->ddev->dev);
>         pm_runtime_put_autosuspend(adev->ddev->dev);
>
> +       amdgpu_virt_disable_access_debugfs(adev);
>         return result;
>  }
>
> @@ -403,6 +427,10 @@ static ssize_t amdgpu_debugfs_regs_didt_write(struct file *f, const char __user
>         if (r < 0)
>                 return r;
>
> +       r = amdgpu_virt_enable_access_debugfs(adev);
> +       if (r < 0)
> +               return r;
> +
>         while (size) {
>                 uint32_t value;
>
> @@ -410,6 +438,7 @@ static ssize_t amdgpu_debugfs_regs_didt_write(struct file *f, const char __user
>                 if (r) {
>                         pm_runtime_mark_last_busy(adev->ddev->dev);
>                         pm_runtime_put_autosuspend(adev->ddev->dev);
> +                       amdgpu_virt_disable_access_debugfs(adev);
>                         return r;
>                 }
>
> @@ -424,6 +453,7 @@ static ssize_t amdgpu_debugfs_regs_didt_write(struct file *f, const char __user
>         pm_runtime_mark_last_busy(adev->ddev->dev);
>         pm_runtime_put_autosuspend(adev->ddev->dev);
>
> +       amdgpu_virt_disable_access_debugfs(adev);
>         return result;
>  }
>
> @@ -453,6 +483,10 @@ static ssize_t amdgpu_debugfs_regs_smc_read(struct file *f, char __user *buf,
>         if (r < 0)
>                 return r;
>
> +       r = amdgpu_virt_enable_access_debugfs(adev);
> +       if (r < 0)
> +               return r;
> +
>         while (size) {
>                 uint32_t value;
>
> @@ -461,6 +495,7 @@ static ssize_t amdgpu_debugfs_regs_smc_read(struct file *f, char __user *buf,
>                 if (r) {
>                         pm_runtime_mark_last_busy(adev->ddev->dev);
>                         pm_runtime_put_autosuspend(adev->ddev->dev);
> +                       amdgpu_virt_disable_access_debugfs(adev);
>                         return r;
>                 }
>
> @@ -473,6 +508,7 @@ static ssize_t amdgpu_debugfs_regs_smc_read(struct file *f, char __user *buf,
>         pm_runtime_mark_last_busy(adev->ddev->dev);
>         pm_runtime_put_autosuspend(adev->ddev->dev);
>
> +       amdgpu_virt_disable_access_debugfs(adev);
>         return result;
>  }
>
> @@ -502,6 +538,10 @@ static ssize_t amdgpu_debugfs_regs_smc_write(struct file *f, const char __user *
>         if (r < 0)
>                 return r;
>
> +       r = amdgpu_virt_enable_access_debugfs(adev);
> +       if (r < 0)
> +               return r;
> +
>         while (size) {
>                 uint32_t value;
>
> @@ -509,6 +549,7 @@ static ssize_t amdgpu_debugfs_regs_smc_write(struct file *f, const char __user *
>                 if (r) {
>                         pm_runtime_mark_last_busy(adev->ddev->dev);
>                         pm_runtime_put_autosuspend(adev->ddev->dev);
> +                       amdgpu_virt_disable_access_debugfs(adev);
>                         return r;
>                 }
>
> @@ -523,6 +564,7 @@ static ssize_t amdgpu_debugfs_regs_smc_write(struct file *f, const char __user *
>         pm_runtime_mark_last_busy(adev->ddev->dev);
>         pm_runtime_put_autosuspend(adev->ddev->dev);
>
> +       amdgpu_virt_disable_access_debugfs(adev);
>         return result;
>  }
>
> @@ -651,16 +693,24 @@ static ssize_t amdgpu_debugfs_sensor_read(struct file *f, char __user *buf,
>         if (r < 0)
>                 return r;
>
> +       r = amdgpu_virt_enable_access_debugfs(adev);
> +       if (r < 0)
> +               return r;
> +
>         r = amdgpu_dpm_read_sensor(adev, idx, &values[0], &valuesize);
>
>         pm_runtime_mark_last_busy(adev->ddev->dev);
>         pm_runtime_put_autosuspend(adev->ddev->dev);
>
> -       if (r)
> +       if (r) {
> +               amdgpu_virt_disable_access_debugfs(adev);
>                 return r;
> +       }
>
> -       if (size > valuesize)
> +       if (size > valuesize) {
> +               amdgpu_virt_disable_access_debugfs(adev);
>                 return -EINVAL;
> +       }
>
>         outsize = 0;
>         x = 0;
> @@ -673,6 +723,7 @@ static ssize_t amdgpu_debugfs_sensor_read(struct file *f, char __user *buf,
>                 }
>         }
>
> +       amdgpu_virt_disable_access_debugfs(adev);
>         return !r ? outsize : r;
>  }
>
> @@ -720,6 +771,10 @@ static ssize_t amdgpu_debugfs_wave_read(struct file *f, char __user *buf,
>         if (r < 0)
>                 return r;
>
> +       r = amdgpu_virt_enable_access_debugfs(adev);
> +       if (r < 0)
> +               return r;
> +
>         /* switch to the specific se/sh/cu */
>         mutex_lock(&adev->grbm_idx_mutex);
>         amdgpu_gfx_select_se_sh(adev, se, sh, cu);
> @@ -734,16 +789,20 @@ static ssize_t amdgpu_debugfs_wave_read(struct file *f, char __user *buf,
>         pm_runtime_mark_last_busy(adev->ddev->dev);
>         pm_runtime_put_autosuspend(adev->ddev->dev);
>
> -       if (!x)
> +       if (!x) {
> +               amdgpu_virt_disable_access_debugfs(adev);
>                 return -EINVAL;
> +       }
>
>         while (size && (offset < x * 4)) {
>                 uint32_t value;
>
>                 value = data[offset >> 2];
>                 r = put_user(value, (uint32_t *)buf);
> -               if (r)
> +               if (r) {
> +                       amdgpu_virt_disable_access_debugfs(adev);
>                         return r;
> +               }
>
>                 result += 4;
>                 buf += 4;
> @@ -751,6 +810,7 @@ static ssize_t amdgpu_debugfs_wave_read(struct file *f, char __user *buf,
>                 size -= 4;
>         }
>
> +       amdgpu_virt_disable_access_debugfs(adev);
>         return result;
>  }
>
> @@ -805,6 +865,10 @@ static ssize_t amdgpu_debugfs_gpr_read(struct file *f, char __user *buf,
>         if (r < 0)
>                 return r;
>
> +       r = amdgpu_virt_enable_access_debugfs(adev);
> +       if (r < 0)
> +               return r;
> +
>         /* switch to the specific se/sh/cu */
>         mutex_lock(&adev->grbm_idx_mutex);
>         amdgpu_gfx_select_se_sh(adev, se, sh, cu);
> @@ -840,6 +904,7 @@ static ssize_t amdgpu_debugfs_gpr_read(struct file *f, char __user *buf,
>
>  err:
>         kfree(data);
> +       amdgpu_virt_disable_access_debugfs(adev);
>         return result;
>  }
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
> index 2b99f5952375..35c381ec0423 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
> @@ -33,6 +33,7 @@ static void amdgpu_job_timedout(struct drm_sched_job *s_job)
>         struct amdgpu_ring *ring = to_amdgpu_ring(s_job->sched);
>         struct amdgpu_job *job = to_amdgpu_job(s_job);
>         struct amdgpu_task_info ti;
> +       struct amdgpu_device *adev = ring->adev;
>
>         memset(&ti, 0, sizeof(struct amdgpu_task_info));
>
> @@ -49,10 +50,13 @@ static void amdgpu_job_timedout(struct drm_sched_job *s_job)
>         DRM_ERROR("Process information: process %s pid %d thread %s pid %d\n",
>                   ti.process_name, ti.tgid, ti.task_name, ti.pid);
>
> -       if (amdgpu_device_should_recover_gpu(ring->adev))
> +       if (amdgpu_device_should_recover_gpu(ring->adev)) {
>                 amdgpu_device_gpu_recover(ring->adev, job);
> -       else
> +       } else {
>                 drm_sched_suspend_timeout(&ring->sched);
> +               if (amdgpu_sriov_vf(adev))
> +                       adev->virt.tdr_debug = true;
> +       }
>  }
>
>  int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs,
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
> index 4d06c79065bf..e8266847675b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
> @@ -334,3 +334,29 @@ void amdgpu_detect_virtualization(struct amdgpu_device *adev)
>                         adev->virt.caps |= AMDGPU_PASSTHROUGH_MODE;
>         }
>  }
> +
> +bool amdgpu_virt_can_access_debugfs(struct amdgpu_device *adev)
> +{
> +       return amdgpu_sriov_is_debug(adev) ? true : false;
> +}
> +
> +int amdgpu_virt_enable_access_debugfs(struct amdgpu_device *adev)
> +{
> +       int ret = 0;
> +
> +       if (!amdgpu_sriov_vf(adev))
> +               return ret;
> +
> +       if (amdgpu_virt_can_access_debugfs(adev))
> +               adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME;
> +       else
> +               ret = -EPERM;
> +
> +       return ret;

You can drop the ret variable in this function and just return
constants for each case.  E.g.,

> +       if (!amdgpu_sriov_vf(adev))
> +               return 0;
> +
> +       if (amdgpu_virt_can_access_debugfs(adev))
> +               adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME;
> +       else
> +               return -EPERM;
> +
> +       return 0;

Other than that the patch looks good to me.
Acked-by: Alex Deucher <alexander.deucher@amd.com>

Alex

> +}
> +
> +void amdgpu_virt_disable_access_debugfs(struct amdgpu_device *adev)
> +{
> +       if (amdgpu_sriov_vf(adev))
> +               adev->virt.caps |= AMDGPU_SRIOV_CAPS_RUNTIME;
> +}
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
> index f6ae3c656304..8f20e6dbd7a9 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
> @@ -265,6 +265,7 @@ struct amdgpu_virt {
>         uint32_t gim_feature;
>         uint32_t reg_access_mode;
>         int req_init_data_ver;
> +       bool tdr_debug;
>  };
>
>  #define amdgpu_sriov_enabled(adev) \
> @@ -296,6 +297,8 @@ static inline bool is_virtual_machine(void)
>
>  #define amdgpu_sriov_is_pp_one_vf(adev) \
>         ((adev)->virt.gim_feature & AMDGIM_FEATURE_PP_ONE_VF)
> +#define amdgpu_sriov_is_debug(adev) \
> +       ((!adev->in_gpu_reset) && adev->virt.tdr_debug)
>
>  bool amdgpu_virt_mmio_blocked(struct amdgpu_device *adev);
>  void amdgpu_virt_init_setting(struct amdgpu_device *adev);
> @@ -314,4 +317,8 @@ int amdgpu_virt_fw_reserve_get_checksum(void *obj, unsigned long obj_size,
>                                         unsigned int chksum);
>  void amdgpu_virt_init_data_exchange(struct amdgpu_device *adev);
>  void amdgpu_detect_virtualization(struct amdgpu_device *adev);
> +
> +bool amdgpu_virt_can_access_debugfs(struct amdgpu_device *adev);
> +int amdgpu_virt_enable_access_debugfs(struct amdgpu_device *adev);
> +void amdgpu_virt_disable_access_debugfs(struct amdgpu_device *adev);
>  #endif
> --
> 2.17.1
>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH] drm/amdgpu: restrict debugfs register access under SR-IOV
@ 2020-04-09 14:54 Yintian Tao
  2020-04-09 15:21 ` Alex Deucher
  0 siblings, 1 reply; 8+ messages in thread
From: Yintian Tao @ 2020-04-09 14:54 UTC (permalink / raw)
  To: christian.koenig, Alexander.Deucher; +Cc: amd-gfx, Yintian Tao

Under bare metal, there is no more else to take
care of the GPU register access through MMIO.
Under Virtualization, to access GPU register is
implemented through KIQ during run-time due to
world-switch.

Therefore, under SR-IOV user can only access
debugfs to r/w GPU registers when meets all
three conditions below.
- amdgpu_gpu_recovery=0
- TDR happened
- in_gpu_reset=0

v2: merge amdgpu_virt_can_access_debugfs() into
    amdgpu_virt_enable_access_debugfs()

Signed-off-by: Yintian Tao <yttao@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 73 +++++++++++++++++++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_job.c     |  8 ++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c    | 26 ++++++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h    |  7 ++
 4 files changed, 108 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
index c0f9a651dc06..1a4894fa3693 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
@@ -152,11 +152,16 @@ static int  amdgpu_debugfs_process_reg_op(bool read, struct file *f,
 	if (r < 0)
 		return r;
 
+	r = amdgpu_virt_enable_access_debugfs(adev);
+	if (r < 0)
+		return r;
+
 	if (use_bank) {
 		if ((sh_bank != 0xFFFFFFFF && sh_bank >= adev->gfx.config.max_sh_per_se) ||
 		    (se_bank != 0xFFFFFFFF && se_bank >= adev->gfx.config.max_shader_engines)) {
 			pm_runtime_mark_last_busy(adev->ddev->dev);
 			pm_runtime_put_autosuspend(adev->ddev->dev);
+			amdgpu_virt_disable_access_debugfs(adev);
 			return -EINVAL;
 		}
 		mutex_lock(&adev->grbm_idx_mutex);
@@ -207,6 +212,7 @@ static int  amdgpu_debugfs_process_reg_op(bool read, struct file *f,
 	pm_runtime_mark_last_busy(adev->ddev->dev);
 	pm_runtime_put_autosuspend(adev->ddev->dev);
 
+	amdgpu_virt_disable_access_debugfs(adev);
 	return result;
 }
 
@@ -255,6 +261,10 @@ static ssize_t amdgpu_debugfs_regs_pcie_read(struct file *f, char __user *buf,
 	if (r < 0)
 		return r;
 
+	r = amdgpu_virt_enable_access_debugfs(adev);
+	if (r < 0)
+		return r;
+
 	while (size) {
 		uint32_t value;
 
@@ -263,6 +273,7 @@ static ssize_t amdgpu_debugfs_regs_pcie_read(struct file *f, char __user *buf,
 		if (r) {
 			pm_runtime_mark_last_busy(adev->ddev->dev);
 			pm_runtime_put_autosuspend(adev->ddev->dev);
+			amdgpu_virt_disable_access_debugfs(adev);
 			return r;
 		}
 
@@ -275,6 +286,7 @@ static ssize_t amdgpu_debugfs_regs_pcie_read(struct file *f, char __user *buf,
 	pm_runtime_mark_last_busy(adev->ddev->dev);
 	pm_runtime_put_autosuspend(adev->ddev->dev);
 
+	amdgpu_virt_disable_access_debugfs(adev);
 	return result;
 }
 
@@ -304,6 +316,10 @@ static ssize_t amdgpu_debugfs_regs_pcie_write(struct file *f, const char __user
 	if (r < 0)
 		return r;
 
+	r = amdgpu_virt_enable_access_debugfs(adev);
+	if (r < 0)
+		return r;
+
 	while (size) {
 		uint32_t value;
 
@@ -311,6 +327,7 @@ static ssize_t amdgpu_debugfs_regs_pcie_write(struct file *f, const char __user
 		if (r) {
 			pm_runtime_mark_last_busy(adev->ddev->dev);
 			pm_runtime_put_autosuspend(adev->ddev->dev);
+			amdgpu_virt_disable_access_debugfs(adev);
 			return r;
 		}
 
@@ -325,6 +342,7 @@ static ssize_t amdgpu_debugfs_regs_pcie_write(struct file *f, const char __user
 	pm_runtime_mark_last_busy(adev->ddev->dev);
 	pm_runtime_put_autosuspend(adev->ddev->dev);
 
+	amdgpu_virt_disable_access_debugfs(adev);
 	return result;
 }
 
@@ -354,6 +372,10 @@ static ssize_t amdgpu_debugfs_regs_didt_read(struct file *f, char __user *buf,
 	if (r < 0)
 		return r;
 
+	r = amdgpu_virt_enable_access_debugfs(adev);
+	if (r < 0)
+		return r;
+
 	while (size) {
 		uint32_t value;
 
@@ -362,6 +384,7 @@ static ssize_t amdgpu_debugfs_regs_didt_read(struct file *f, char __user *buf,
 		if (r) {
 			pm_runtime_mark_last_busy(adev->ddev->dev);
 			pm_runtime_put_autosuspend(adev->ddev->dev);
+			amdgpu_virt_disable_access_debugfs(adev);
 			return r;
 		}
 
@@ -374,6 +397,7 @@ static ssize_t amdgpu_debugfs_regs_didt_read(struct file *f, char __user *buf,
 	pm_runtime_mark_last_busy(adev->ddev->dev);
 	pm_runtime_put_autosuspend(adev->ddev->dev);
 
+	amdgpu_virt_disable_access_debugfs(adev);
 	return result;
 }
 
@@ -403,6 +427,10 @@ static ssize_t amdgpu_debugfs_regs_didt_write(struct file *f, const char __user
 	if (r < 0)
 		return r;
 
+	r = amdgpu_virt_enable_access_debugfs(adev);
+	if (r < 0)
+		return r;
+
 	while (size) {
 		uint32_t value;
 
@@ -410,6 +438,7 @@ static ssize_t amdgpu_debugfs_regs_didt_write(struct file *f, const char __user
 		if (r) {
 			pm_runtime_mark_last_busy(adev->ddev->dev);
 			pm_runtime_put_autosuspend(adev->ddev->dev);
+			amdgpu_virt_disable_access_debugfs(adev);
 			return r;
 		}
 
@@ -424,6 +453,7 @@ static ssize_t amdgpu_debugfs_regs_didt_write(struct file *f, const char __user
 	pm_runtime_mark_last_busy(adev->ddev->dev);
 	pm_runtime_put_autosuspend(adev->ddev->dev);
 
+	amdgpu_virt_disable_access_debugfs(adev);
 	return result;
 }
 
@@ -453,6 +483,10 @@ static ssize_t amdgpu_debugfs_regs_smc_read(struct file *f, char __user *buf,
 	if (r < 0)
 		return r;
 
+	r = amdgpu_virt_enable_access_debugfs(adev);
+	if (r < 0)
+		return r;
+
 	while (size) {
 		uint32_t value;
 
@@ -461,6 +495,7 @@ static ssize_t amdgpu_debugfs_regs_smc_read(struct file *f, char __user *buf,
 		if (r) {
 			pm_runtime_mark_last_busy(adev->ddev->dev);
 			pm_runtime_put_autosuspend(adev->ddev->dev);
+			amdgpu_virt_disable_access_debugfs(adev);
 			return r;
 		}
 
@@ -473,6 +508,7 @@ static ssize_t amdgpu_debugfs_regs_smc_read(struct file *f, char __user *buf,
 	pm_runtime_mark_last_busy(adev->ddev->dev);
 	pm_runtime_put_autosuspend(adev->ddev->dev);
 
+	amdgpu_virt_disable_access_debugfs(adev);
 	return result;
 }
 
@@ -502,6 +538,10 @@ static ssize_t amdgpu_debugfs_regs_smc_write(struct file *f, const char __user *
 	if (r < 0)
 		return r;
 
+	r = amdgpu_virt_enable_access_debugfs(adev);
+	if (r < 0)
+		return r;
+
 	while (size) {
 		uint32_t value;
 
@@ -509,6 +549,7 @@ static ssize_t amdgpu_debugfs_regs_smc_write(struct file *f, const char __user *
 		if (r) {
 			pm_runtime_mark_last_busy(adev->ddev->dev);
 			pm_runtime_put_autosuspend(adev->ddev->dev);
+			amdgpu_virt_disable_access_debugfs(adev);
 			return r;
 		}
 
@@ -523,6 +564,7 @@ static ssize_t amdgpu_debugfs_regs_smc_write(struct file *f, const char __user *
 	pm_runtime_mark_last_busy(adev->ddev->dev);
 	pm_runtime_put_autosuspend(adev->ddev->dev);
 
+	amdgpu_virt_disable_access_debugfs(adev);
 	return result;
 }
 
@@ -651,16 +693,24 @@ static ssize_t amdgpu_debugfs_sensor_read(struct file *f, char __user *buf,
 	if (r < 0)
 		return r;
 
+	r = amdgpu_virt_enable_access_debugfs(adev);
+	if (r < 0)
+		return r;
+
 	r = amdgpu_dpm_read_sensor(adev, idx, &values[0], &valuesize);
 
 	pm_runtime_mark_last_busy(adev->ddev->dev);
 	pm_runtime_put_autosuspend(adev->ddev->dev);
 
-	if (r)
+	if (r) {
+		amdgpu_virt_disable_access_debugfs(adev);
 		return r;
+	}
 
-	if (size > valuesize)
+	if (size > valuesize) {
+		amdgpu_virt_disable_access_debugfs(adev);
 		return -EINVAL;
+	}
 
 	outsize = 0;
 	x = 0;
@@ -673,6 +723,7 @@ static ssize_t amdgpu_debugfs_sensor_read(struct file *f, char __user *buf,
 		}
 	}
 
+	amdgpu_virt_disable_access_debugfs(adev);
 	return !r ? outsize : r;
 }
 
@@ -720,6 +771,10 @@ static ssize_t amdgpu_debugfs_wave_read(struct file *f, char __user *buf,
 	if (r < 0)
 		return r;
 
+	r = amdgpu_virt_enable_access_debugfs(adev);
+	if (r < 0)
+		return r;
+
 	/* switch to the specific se/sh/cu */
 	mutex_lock(&adev->grbm_idx_mutex);
 	amdgpu_gfx_select_se_sh(adev, se, sh, cu);
@@ -734,16 +789,20 @@ static ssize_t amdgpu_debugfs_wave_read(struct file *f, char __user *buf,
 	pm_runtime_mark_last_busy(adev->ddev->dev);
 	pm_runtime_put_autosuspend(adev->ddev->dev);
 
-	if (!x)
+	if (!x) {
+		amdgpu_virt_disable_access_debugfs(adev);
 		return -EINVAL;
+	}
 
 	while (size && (offset < x * 4)) {
 		uint32_t value;
 
 		value = data[offset >> 2];
 		r = put_user(value, (uint32_t *)buf);
-		if (r)
+		if (r) {
+			amdgpu_virt_disable_access_debugfs(adev);
 			return r;
+		}
 
 		result += 4;
 		buf += 4;
@@ -751,6 +810,7 @@ static ssize_t amdgpu_debugfs_wave_read(struct file *f, char __user *buf,
 		size -= 4;
 	}
 
+	amdgpu_virt_disable_access_debugfs(adev);
 	return result;
 }
 
@@ -805,6 +865,10 @@ static ssize_t amdgpu_debugfs_gpr_read(struct file *f, char __user *buf,
 	if (r < 0)
 		return r;
 
+	r = amdgpu_virt_enable_access_debugfs(adev);
+	if (r < 0)
+		return r;
+
 	/* switch to the specific se/sh/cu */
 	mutex_lock(&adev->grbm_idx_mutex);
 	amdgpu_gfx_select_se_sh(adev, se, sh, cu);
@@ -840,6 +904,7 @@ static ssize_t amdgpu_debugfs_gpr_read(struct file *f, char __user *buf,
 
 err:
 	kfree(data);
+	amdgpu_virt_disable_access_debugfs(adev);
 	return result;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
index 2b99f5952375..35c381ec0423 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -33,6 +33,7 @@ static void amdgpu_job_timedout(struct drm_sched_job *s_job)
 	struct amdgpu_ring *ring = to_amdgpu_ring(s_job->sched);
 	struct amdgpu_job *job = to_amdgpu_job(s_job);
 	struct amdgpu_task_info ti;
+	struct amdgpu_device *adev = ring->adev;
 
 	memset(&ti, 0, sizeof(struct amdgpu_task_info));
 
@@ -49,10 +50,13 @@ static void amdgpu_job_timedout(struct drm_sched_job *s_job)
 	DRM_ERROR("Process information: process %s pid %d thread %s pid %d\n",
 		  ti.process_name, ti.tgid, ti.task_name, ti.pid);
 
-	if (amdgpu_device_should_recover_gpu(ring->adev))
+	if (amdgpu_device_should_recover_gpu(ring->adev)) {
 		amdgpu_device_gpu_recover(ring->adev, job);
-	else
+	} else {
 		drm_sched_suspend_timeout(&ring->sched);
+		if (amdgpu_sriov_vf(adev))
+			adev->virt.tdr_debug = true;
+	}
 }
 
 int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
index 4d06c79065bf..e8266847675b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
@@ -334,3 +334,29 @@ void amdgpu_detect_virtualization(struct amdgpu_device *adev)
 			adev->virt.caps |= AMDGPU_PASSTHROUGH_MODE;
 	}
 }
+
+bool amdgpu_virt_can_access_debugfs(struct amdgpu_device *adev)
+{
+	return amdgpu_sriov_is_debug(adev) ? true : false;
+}
+
+int amdgpu_virt_enable_access_debugfs(struct amdgpu_device *adev)
+{
+	int ret = 0;
+
+	if (!amdgpu_sriov_vf(adev))
+		return ret;
+
+	if (amdgpu_virt_can_access_debugfs(adev))
+		adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME;
+	else
+		ret = -EPERM;
+
+	return ret;
+}
+
+void amdgpu_virt_disable_access_debugfs(struct amdgpu_device *adev)
+{
+	if (amdgpu_sriov_vf(adev))
+		adev->virt.caps |= AMDGPU_SRIOV_CAPS_RUNTIME;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
index f6ae3c656304..8f20e6dbd7a9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
@@ -265,6 +265,7 @@ struct amdgpu_virt {
 	uint32_t gim_feature;
 	uint32_t reg_access_mode;
 	int req_init_data_ver;
+	bool tdr_debug;
 };
 
 #define amdgpu_sriov_enabled(adev) \
@@ -296,6 +297,8 @@ static inline bool is_virtual_machine(void)
 
 #define amdgpu_sriov_is_pp_one_vf(adev) \
 	((adev)->virt.gim_feature & AMDGIM_FEATURE_PP_ONE_VF)
+#define amdgpu_sriov_is_debug(adev) \
+	((!adev->in_gpu_reset) && adev->virt.tdr_debug)
 
 bool amdgpu_virt_mmio_blocked(struct amdgpu_device *adev);
 void amdgpu_virt_init_setting(struct amdgpu_device *adev);
@@ -314,4 +317,8 @@ int amdgpu_virt_fw_reserve_get_checksum(void *obj, unsigned long obj_size,
 					unsigned int chksum);
 void amdgpu_virt_init_data_exchange(struct amdgpu_device *adev);
 void amdgpu_detect_virtualization(struct amdgpu_device *adev);
+
+bool amdgpu_virt_can_access_debugfs(struct amdgpu_device *adev);
+int amdgpu_virt_enable_access_debugfs(struct amdgpu_device *adev);
+void amdgpu_virt_disable_access_debugfs(struct amdgpu_device *adev);
 #endif
-- 
2.17.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2020-04-13  6:52 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-04-09  6:01 [PATCH] drm/amdgpu: restrict debugfs register access under SR-IOV Yintian Tao
2020-04-09 12:41 ` Christian König
2020-04-09 13:42   ` Tao, Yintian
2020-04-09 14:54 Yintian Tao
2020-04-09 15:21 ` Alex Deucher
2020-04-09 15:22   ` Tao, Yintian
2020-04-11  8:58     ` Liu, Monk
2020-04-13  6:52       ` Tao, Yintian

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.