AMD-GFX Archive on lore.kernel.org
 help / color / Atom feed
* [PATCH 1/2] drm/amdgpu: return IH ring drain finished if ring is empty
@ 2021-04-18 17:35 Philip Yang
  2021-04-18 17:35 ` [PATCH 2/2] drm/amdkfd: handle stale retry fault Philip Yang
  0 siblings, 1 reply; 3+ messages in thread
From: Philip Yang @ 2021-04-18 17:35 UTC (permalink / raw)
  To: amd-gfx; +Cc: Philip Yang

Sometimes IH do not setup ring wptr overflow flag after wptr exceed
rptr. As a workaround, if IH rptr equals to wptr, ring is empty,
return true to indicate IH ring checkpoint is processed, IH ring drain
is finished.

Signed-off-by: Philip Yang <Philip.Yang@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
index faaa6aa2faaf..a36e191cf086 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
@@ -175,7 +175,9 @@ static bool amdgpu_ih_has_checkpoint_processed(struct amdgpu_device *adev,
 		cur_rptr += ih->ptr_mask + 1;
 	*prev_rptr = cur_rptr;
 
-	return cur_rptr >= checkpoint_wptr;
+	/* check ring is empty to workaround missing wptr overflow flag */
+	return cur_rptr >= checkpoint_wptr ||
+	       (cur_rptr & ih->ptr_mask) == amdgpu_ih_get_wptr(adev, ih);
 }
 
 /**
-- 
2.17.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 3+ messages in thread

* [PATCH 2/2] drm/amdkfd: handle stale retry fault
  2021-04-18 17:35 [PATCH 1/2] drm/amdgpu: return IH ring drain finished if ring is empty Philip Yang
@ 2021-04-18 17:35 ` Philip Yang
  2021-04-19 18:52   ` Felix Kuehling
  0 siblings, 1 reply; 3+ messages in thread
From: Philip Yang @ 2021-04-18 17:35 UTC (permalink / raw)
  To: amd-gfx; +Cc: Philip Yang

Retry fault interrupt maybe pending in IH ring after GPU page table is
updated to recover the vm fault, because each page of the range generate
retry fault interrupt. There is race if application unmap range to
remove and free the range first and then retry fault work restore_pages
handle the retry fault interrupt, because range can not be found, this
vm fault can not be recovered and report incorrect GPU vm fault to
application.

Before unmap to remove and free range, drain retry fault interrupt from
IH ring1 to ensure no retry fault comes after the range is removed.

Drain retry fault interrupt skip the range which is on deferred list to
remove, or the range is child range, which is split by unmap, does not
add to svms and have interval notifier.

Signed-off-by: Philip Yang <Philip.Yang@amd.com>
---
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 75 +++++++++++++++++++++++++++-
 1 file changed, 74 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index 0e0b4ffd20ab..45dd055118eb 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -1402,11 +1402,13 @@ static int svm_range_validate_and_map(struct mm_struct *mm,
 	svm_range_lock(prange);
 	if (!prange->actual_loc) {
 		if (amdgpu_hmm_range_get_pages_done(hmm_range)) {
+			pr_debug("hmm update the range, need validate again\n");
 			r = -EAGAIN;
 			goto unlock_out;
 		}
 	}
 	if (!list_empty(&prange->child_list)) {
+		pr_debug("range split by unmap in parallel, validate again\n");
 		r = -EAGAIN;
 		goto unlock_out;
 	}
@@ -1828,6 +1830,28 @@ svm_range_handle_list_op(struct svm_range_list *svms, struct svm_range *prange)
 	}
 }
 
+static void svm_range_drain_retry_fault(struct svm_range_list *svms)
+{
+	struct kfd_process_device *pdd;
+	struct amdgpu_device *adev;
+	struct kfd_process *p;
+	uint32_t i;
+
+	p = container_of(svms, struct kfd_process, svms);
+
+	for (i = 0; i < p->n_pdds; i++) {
+		pdd = p->pdds[i];
+		if (!pdd)
+			continue;
+
+		pr_debug("drain retry fault gpu %d svms %p\n", i, svms);
+		adev = (struct amdgpu_device *)pdd->dev->kgd;
+
+		amdgpu_ih_wait_on_checkpoint_process(adev, &adev->irq.ih1);
+		pr_debug("drain retry fault gpu %d svms 0x%p done\n", i, svms);
+	}
+}
+
 static void svm_range_deferred_list_work(struct work_struct *work)
 {
 	struct svm_range_list *svms;
@@ -1845,6 +1869,10 @@ static void svm_range_deferred_list_work(struct work_struct *work)
 		pr_debug("prange 0x%p [0x%lx 0x%lx] op %d\n", prange,
 			 prange->start, prange->last, prange->work_item.op);
 
+		/* Make sure no stale retry fault coming after range is freed */
+		if (prange->work_item.op == SVM_OP_UNMAP_RANGE)
+			svm_range_drain_retry_fault(prange->svms);
+
 		mm = prange->work_item.mm;
 		mmap_write_lock(mm);
 		mutex_lock(&svms->lock);
@@ -2152,6 +2180,44 @@ svm_range_best_restore_location(struct svm_range *prange,
 	return -1;
 }
 
+/* svm_range_skip_recover - decide if prange can be recovered
+ * @prange: svm range structure
+ *
+ * GPU vm retry fault handle skip recover the range for cases:
+ * 1. prange is on deferred list to be removed after unmap, it is stale fault,
+ *    deferred list work will drain the stale fault before free the prange.
+ * 2. prange is on deferred list to add interval notifier after split, or
+ * 3. prange is child range, it is split from parent prange, recover later
+ *    after interval notifier is added.
+ *
+ * Return: true to skip recover, false to recover
+ */
+static bool svm_range_skip_recover(struct svm_range *prange)
+{
+	struct svm_range_list *svms = prange->svms;
+
+	spin_lock(&svms->deferred_list_lock);
+	if (list_empty(&prange->deferred_list) &&
+	    list_empty(&prange->child_list)) {
+		spin_unlock(&svms->deferred_list_lock);
+		return false;
+	}
+	spin_unlock(&svms->deferred_list_lock);
+
+	if (prange->work_item.op == SVM_OP_UNMAP_RANGE) {
+		pr_debug("svms 0x%p prange 0x%p [0x%lx 0x%lx] unmapped\n",
+			 svms, prange, prange->start, prange->last);
+		return true;
+	}
+	if (prange->work_item.op == SVM_OP_ADD_RANGE_AND_MAP ||
+	    prange->work_item.op == SVM_OP_ADD_RANGE) {
+		pr_debug("svms 0x%p prange 0x%p [0x%lx 0x%lx] not added yet\n",
+			 svms, prange, prange->start, prange->last);
+		return true;
+	}
+	return false;
+}
+
 int
 svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid,
 			uint64_t addr)
@@ -2187,7 +2253,6 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid,
 	mmap_read_lock(mm);
 	mutex_lock(&svms->lock);
 	prange = svm_range_from_addr(svms, addr, NULL);
-
 	if (!prange) {
 		pr_debug("failed to find prange svms 0x%p address [0x%llx]\n",
 			 svms, addr);
@@ -2196,6 +2261,10 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid,
 	}
 
 	mutex_lock(&prange->migrate_mutex);
+
+	if (svm_range_skip_recover(prange))
+		goto out_unlock_range;
+
 	timestamp = ktime_to_us(ktime_get()) - prange->validate_timestamp;
 	/* skip duplicate vm fault on different pages of same range */
 	if (timestamp < AMDGPU_SVM_RANGE_RETRY_FAULT_PENDING) {
@@ -2254,6 +2323,10 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid,
 out:
 	kfd_unref_process(p);
 
+	if (r == -EAGAIN) {
+		pr_debug("recover vm fault later\n");
+		r = 0;
+	}
 	return r;
 }
 
-- 
2.17.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH 2/2] drm/amdkfd: handle stale retry fault
  2021-04-18 17:35 ` [PATCH 2/2] drm/amdkfd: handle stale retry fault Philip Yang
@ 2021-04-19 18:52   ` Felix Kuehling
  0 siblings, 0 replies; 3+ messages in thread
From: Felix Kuehling @ 2021-04-19 18:52 UTC (permalink / raw)
  To: Philip Yang, amd-gfx


Am 2021-04-18 um 1:35 p.m. schrieb Philip Yang:
> Retry fault interrupt maybe pending in IH ring after GPU page table is
> updated to recover the vm fault, because each page of the range generate
> retry fault interrupt. There is race if application unmap range to
> remove and free the range first and then retry fault work restore_pages
> handle the retry fault interrupt, because range can not be found, this
> vm fault can not be recovered and report incorrect GPU vm fault to
> application.
>
> Before unmap to remove and free range, drain retry fault interrupt from
> IH ring1 to ensure no retry fault comes after the range is removed.
>
> Drain retry fault interrupt skip the range which is on deferred list to
> remove, or the range is child range, which is split by unmap, does not
> add to svms and have interval notifier.
>
> Signed-off-by: Philip Yang <Philip.Yang@amd.com>
The series looks good to me. But the skip-recover changes and the
-EAGAIN handling in svm_range_restore_pages should be a separate patch.

Also, when we defer processing an interrupt (skip-recover or r ==
-EAGAIN) and wait for a subsequent retry interrupt, we may want to
remove that fault address from the gmc->fault_ring that's used by
amdgpu_gmc_filter_faults to filter out repeated page faults on the same
address. In the future we would also have to remove those addresses from
the IH CAM.

Regards,
  Felix



> ---
>  drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 75 +++++++++++++++++++++++++++-
>  1 file changed, 74 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
> index 0e0b4ffd20ab..45dd055118eb 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
> @@ -1402,11 +1402,13 @@ static int svm_range_validate_and_map(struct mm_struct *mm,
>  	svm_range_lock(prange);
>  	if (!prange->actual_loc) {
>  		if (amdgpu_hmm_range_get_pages_done(hmm_range)) {
> +			pr_debug("hmm update the range, need validate again\n");
>  			r = -EAGAIN;
>  			goto unlock_out;
>  		}
>  	}
>  	if (!list_empty(&prange->child_list)) {
> +		pr_debug("range split by unmap in parallel, validate again\n");
>  		r = -EAGAIN;
>  		goto unlock_out;
>  	}
> @@ -1828,6 +1830,28 @@ svm_range_handle_list_op(struct svm_range_list *svms, struct svm_range *prange)
>  	}
>  }
>  
> +static void svm_range_drain_retry_fault(struct svm_range_list *svms)
> +{
> +	struct kfd_process_device *pdd;
> +	struct amdgpu_device *adev;
> +	struct kfd_process *p;
> +	uint32_t i;
> +
> +	p = container_of(svms, struct kfd_process, svms);
> +
> +	for (i = 0; i < p->n_pdds; i++) {
> +		pdd = p->pdds[i];
> +		if (!pdd)
> +			continue;
> +
> +		pr_debug("drain retry fault gpu %d svms %p\n", i, svms);
> +		adev = (struct amdgpu_device *)pdd->dev->kgd;
> +
> +		amdgpu_ih_wait_on_checkpoint_process(adev, &adev->irq.ih1);
> +		pr_debug("drain retry fault gpu %d svms 0x%p done\n", i, svms);
> +	}
> +}
> +
>  static void svm_range_deferred_list_work(struct work_struct *work)
>  {
>  	struct svm_range_list *svms;
> @@ -1845,6 +1869,10 @@ static void svm_range_deferred_list_work(struct work_struct *work)
>  		pr_debug("prange 0x%p [0x%lx 0x%lx] op %d\n", prange,
>  			 prange->start, prange->last, prange->work_item.op);
>  
> +		/* Make sure no stale retry fault coming after range is freed */
> +		if (prange->work_item.op == SVM_OP_UNMAP_RANGE)
> +			svm_range_drain_retry_fault(prange->svms);
> +
>  		mm = prange->work_item.mm;
>  		mmap_write_lock(mm);
>  		mutex_lock(&svms->lock);
> @@ -2152,6 +2180,44 @@ svm_range_best_restore_location(struct svm_range *prange,
>  	return -1;
>  }
>  
> +/* svm_range_skip_recover - decide if prange can be recovered
> + * @prange: svm range structure
> + *
> + * GPU vm retry fault handle skip recover the range for cases:
> + * 1. prange is on deferred list to be removed after unmap, it is stale fault,
> + *    deferred list work will drain the stale fault before free the prange.
> + * 2. prange is on deferred list to add interval notifier after split, or
> + * 3. prange is child range, it is split from parent prange, recover later
> + *    after interval notifier is added.
> + *
> + * Return: true to skip recover, false to recover
> + */
> +static bool svm_range_skip_recover(struct svm_range *prange)
> +{
> +	struct svm_range_list *svms = prange->svms;
> +
> +	spin_lock(&svms->deferred_list_lock);
> +	if (list_empty(&prange->deferred_list) &&
> +	    list_empty(&prange->child_list)) {
> +		spin_unlock(&svms->deferred_list_lock);
> +		return false;
> +	}
> +	spin_unlock(&svms->deferred_list_lock);
> +
> +	if (prange->work_item.op == SVM_OP_UNMAP_RANGE) {
> +		pr_debug("svms 0x%p prange 0x%p [0x%lx 0x%lx] unmapped\n",
> +			 svms, prange, prange->start, prange->last);
> +		return true;
> +	}
> +	if (prange->work_item.op == SVM_OP_ADD_RANGE_AND_MAP ||
> +	    prange->work_item.op == SVM_OP_ADD_RANGE) {
> +		pr_debug("svms 0x%p prange 0x%p [0x%lx 0x%lx] not added yet\n",
> +			 svms, prange, prange->start, prange->last);
> +		return true;
> +	}
> +	return false;
> +}
> +
>  int
>  svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid,
>  			uint64_t addr)
> @@ -2187,7 +2253,6 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid,
>  	mmap_read_lock(mm);
>  	mutex_lock(&svms->lock);
>  	prange = svm_range_from_addr(svms, addr, NULL);
> -
>  	if (!prange) {
>  		pr_debug("failed to find prange svms 0x%p address [0x%llx]\n",
>  			 svms, addr);
> @@ -2196,6 +2261,10 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid,
>  	}
>  
>  	mutex_lock(&prange->migrate_mutex);
> +
> +	if (svm_range_skip_recover(prange))
> +		goto out_unlock_range;
> +
>  	timestamp = ktime_to_us(ktime_get()) - prange->validate_timestamp;
>  	/* skip duplicate vm fault on different pages of same range */
>  	if (timestamp < AMDGPU_SVM_RANGE_RETRY_FAULT_PENDING) {
> @@ -2254,6 +2323,10 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid,
>  out:
>  	kfd_unref_process(p);
>  
> +	if (r == -EAGAIN) {
> +		pr_debug("recover vm fault later\n");
> +		r = 0;
> +	}
>  	return r;
>  }
>  
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, back to index

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-04-18 17:35 [PATCH 1/2] drm/amdgpu: return IH ring drain finished if ring is empty Philip Yang
2021-04-18 17:35 ` [PATCH 2/2] drm/amdkfd: handle stale retry fault Philip Yang
2021-04-19 18:52   ` Felix Kuehling

AMD-GFX Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/amd-gfx/0 amd-gfx/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 amd-gfx amd-gfx/ https://lore.kernel.org/amd-gfx \
		amd-gfx@lists.freedesktop.org
	public-inbox-index amd-gfx

Example config snippet for mirrors

Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.freedesktop.lists.amd-gfx


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git