All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] drm/amdgpu: add ih waiter on process until checkpoint
@ 2021-03-09 16:19 Jonathan Kim
  2021-03-09 17:40 ` Christian König
  2021-03-10 22:29 ` Felix Kuehling
  0 siblings, 2 replies; 9+ messages in thread
From: Jonathan Kim @ 2021-03-09 16:19 UTC (permalink / raw)
  To: amd-gfx; +Cc: Philip.Yang, Felix.Kuehling, Jonathan Kim, Christian.Koenig

Add IH function to allow caller to wait until ring entries are processed
until the checkpoint write pointer.

This will be primarily used by HMM to drain pending page fault interrupts
before memory unmap to prevent HMM from handling stale interrupts.

v3: Scrap busy loop and change to wait_event.

v2: Update title and description to clarify use.
Add rptr/wptr wrap counter checks to guarantee ring entries are processed
until the checkpoint.

Suggested-by: Christian Koenig <christian.koenig@amd.com>
Signed-off-by: Jonathan Kim <jonathan.kim@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c | 49 ++++++++++++++++++++++++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h |  5 +++
 2 files changed, 54 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
index dc852af4f3b7..1024065f1f03 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
@@ -99,6 +99,8 @@ int amdgpu_ih_ring_init(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih,
 		ih->rptr_addr = adev->wb.gpu_addr + rptr_offs * 4;
 		ih->rptr_cpu = &adev->wb.wb[rptr_offs];
 	}
+
+	init_waitqueue_head(&ih->wait_process);
 	return 0;
 }
 
@@ -160,6 +162,52 @@ void amdgpu_ih_ring_write(struct amdgpu_ih_ring *ih, const uint32_t *iv,
 	}
 }
 
+/* Waiter helper that checks current rptr matches or passes checkpoint wptr */
+static bool amdgpu_ih_has_checkpoint_processed(struct amdgpu_device *adev,
+					struct amdgpu_ih_ring *ih,
+					uint32_t checkpoint_wptr,
+					uint32_t *prev_rptr)
+{
+	uint32_t cur_rptr = ih->rptr | (*prev_rptr & ~ih->ptr_mask);
+
+	/* rptr has wrapped. */
+	if (cur_rptr < *prev_rptr)
+		cur_rptr += ih->ptr_mask + 1;
+	*prev_rptr = cur_rptr;
+
+	return cur_rptr >= checkpoint_wptr;
+}
+
+/**
+ * amdgpu_ih_wait_on_checkpoint_process - wait to process IVs up to checkpoint
+ *
+ * @adev: amdgpu_device pointer
+ * @ih: ih ring to process
+ *
+ * Used to ensure ring has processed IVs up to the checkpoint write pointer.
+ */
+int amdgpu_ih_wait_on_checkpoint_process(struct amdgpu_device *adev,
+					struct amdgpu_ih_ring *ih)
+{
+	uint32_t checkpoint_wptr, rptr;
+
+	if (!ih->enabled || adev->shutdown)
+		return -ENODEV;
+
+	checkpoint_wptr = amdgpu_ih_get_wptr(adev, ih);
+	/* Order wptr with rptr. */
+	rmb();
+	rptr = READ_ONCE(ih->rptr);
+
+	/* wptr has wrapped. */
+	if (rptr > checkpoint_wptr)
+		checkpoint_wptr += ih->ptr_mask + 1;
+
+	return wait_event_interruptible(ih->wait_process,
+				amdgpu_ih_has_checkpoint_processed(adev, ih,
+						checkpoint_wptr, &rptr));
+}
+
 /**
  * amdgpu_ih_process - interrupt handler
  *
@@ -195,6 +243,7 @@ int amdgpu_ih_process(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih)
 	}
 
 	amdgpu_ih_set_rptr(adev, ih);
+	wake_up_all(&ih->wait_process);
 	atomic_set(&ih->lock, 0);
 
 	/* make sure wptr hasn't changed while processing */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h
index 6ed4a85fc7c3..87ec6d20dbe0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h
@@ -66,6 +66,9 @@ struct amdgpu_ih_ring {
 	unsigned		rptr;
 	atomic_t		lock;
 	struct amdgpu_ih_regs	ih_regs;
+
+	/* For waiting on IH processing at checkpoint. */
+	wait_queue_head_t wait_process;
 };
 
 /* provided by the ih block */
@@ -87,6 +90,8 @@ int amdgpu_ih_ring_init(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih,
 void amdgpu_ih_ring_fini(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih);
 void amdgpu_ih_ring_write(struct amdgpu_ih_ring *ih, const uint32_t *iv,
 			  unsigned int num_dw);
+int amdgpu_ih_wait_on_checkpoint_process(struct amdgpu_device *adev,
+					struct amdgpu_ih_ring *ih);
 int amdgpu_ih_process(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih);
 void amdgpu_ih_decode_iv_helper(struct amdgpu_device *adev,
 				struct amdgpu_ih_ring *ih,
-- 
2.17.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [PATCH] drm/amdgpu: add ih waiter on process until checkpoint
  2021-03-09 16:19 [PATCH] drm/amdgpu: add ih waiter on process until checkpoint Jonathan Kim
@ 2021-03-09 17:40 ` Christian König
  2021-03-10 22:29 ` Felix Kuehling
  1 sibling, 0 replies; 9+ messages in thread
From: Christian König @ 2021-03-09 17:40 UTC (permalink / raw)
  To: Jonathan Kim, amd-gfx; +Cc: Philip.Yang, Felix.Kuehling

Am 09.03.21 um 17:19 schrieb Jonathan Kim:
> Add IH function to allow caller to wait until ring entries are processed
> until the checkpoint write pointer.
>
> This will be primarily used by HMM to drain pending page fault interrupts
> before memory unmap to prevent HMM from handling stale interrupts.
>
> v3: Scrap busy loop and change to wait_event.
>
> v2: Update title and description to clarify use.
> Add rptr/wptr wrap counter checks to guarantee ring entries are processed
> until the checkpoint.
>
> Suggested-by: Christian Koenig <christian.koenig@amd.com>
> Signed-off-by: Jonathan Kim <jonathan.kim@amd.com>

Reviewed-by: Christian König <christian.koenig@amd.com>

> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c | 49 ++++++++++++++++++++++++++
>   drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h |  5 +++
>   2 files changed, 54 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
> index dc852af4f3b7..1024065f1f03 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
> @@ -99,6 +99,8 @@ int amdgpu_ih_ring_init(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih,
>   		ih->rptr_addr = adev->wb.gpu_addr + rptr_offs * 4;
>   		ih->rptr_cpu = &adev->wb.wb[rptr_offs];
>   	}
> +
> +	init_waitqueue_head(&ih->wait_process);
>   	return 0;
>   }
>   
> @@ -160,6 +162,52 @@ void amdgpu_ih_ring_write(struct amdgpu_ih_ring *ih, const uint32_t *iv,
>   	}
>   }
>   
> +/* Waiter helper that checks current rptr matches or passes checkpoint wptr */
> +static bool amdgpu_ih_has_checkpoint_processed(struct amdgpu_device *adev,
> +					struct amdgpu_ih_ring *ih,
> +					uint32_t checkpoint_wptr,
> +					uint32_t *prev_rptr)
> +{
> +	uint32_t cur_rptr = ih->rptr | (*prev_rptr & ~ih->ptr_mask);
> +
> +	/* rptr has wrapped. */
> +	if (cur_rptr < *prev_rptr)
> +		cur_rptr += ih->ptr_mask + 1;
> +	*prev_rptr = cur_rptr;
> +
> +	return cur_rptr >= checkpoint_wptr;
> +}
> +
> +/**
> + * amdgpu_ih_wait_on_checkpoint_process - wait to process IVs up to checkpoint
> + *
> + * @adev: amdgpu_device pointer
> + * @ih: ih ring to process
> + *
> + * Used to ensure ring has processed IVs up to the checkpoint write pointer.
> + */
> +int amdgpu_ih_wait_on_checkpoint_process(struct amdgpu_device *adev,
> +					struct amdgpu_ih_ring *ih)
> +{
> +	uint32_t checkpoint_wptr, rptr;
> +
> +	if (!ih->enabled || adev->shutdown)
> +		return -ENODEV;
> +
> +	checkpoint_wptr = amdgpu_ih_get_wptr(adev, ih);
> +	/* Order wptr with rptr. */
> +	rmb();
> +	rptr = READ_ONCE(ih->rptr);
> +
> +	/* wptr has wrapped. */
> +	if (rptr > checkpoint_wptr)
> +		checkpoint_wptr += ih->ptr_mask + 1;
> +
> +	return wait_event_interruptible(ih->wait_process,
> +				amdgpu_ih_has_checkpoint_processed(adev, ih,
> +						checkpoint_wptr, &rptr));
> +}
> +
>   /**
>    * amdgpu_ih_process - interrupt handler
>    *
> @@ -195,6 +243,7 @@ int amdgpu_ih_process(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih)
>   	}
>   
>   	amdgpu_ih_set_rptr(adev, ih);
> +	wake_up_all(&ih->wait_process);
>   	atomic_set(&ih->lock, 0);
>   
>   	/* make sure wptr hasn't changed while processing */
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h
> index 6ed4a85fc7c3..87ec6d20dbe0 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h
> @@ -66,6 +66,9 @@ struct amdgpu_ih_ring {
>   	unsigned		rptr;
>   	atomic_t		lock;
>   	struct amdgpu_ih_regs	ih_regs;
> +
> +	/* For waiting on IH processing at checkpoint. */
> +	wait_queue_head_t wait_process;
>   };
>   
>   /* provided by the ih block */
> @@ -87,6 +90,8 @@ int amdgpu_ih_ring_init(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih,
>   void amdgpu_ih_ring_fini(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih);
>   void amdgpu_ih_ring_write(struct amdgpu_ih_ring *ih, const uint32_t *iv,
>   			  unsigned int num_dw);
> +int amdgpu_ih_wait_on_checkpoint_process(struct amdgpu_device *adev,
> +					struct amdgpu_ih_ring *ih);
>   int amdgpu_ih_process(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih);
>   void amdgpu_ih_decode_iv_helper(struct amdgpu_device *adev,
>   				struct amdgpu_ih_ring *ih,

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] drm/amdgpu: add ih waiter on process until checkpoint
  2021-03-09 16:19 [PATCH] drm/amdgpu: add ih waiter on process until checkpoint Jonathan Kim
  2021-03-09 17:40 ` Christian König
@ 2021-03-10 22:29 ` Felix Kuehling
  1 sibling, 0 replies; 9+ messages in thread
From: Felix Kuehling @ 2021-03-10 22:29 UTC (permalink / raw)
  To: Jonathan Kim, amd-gfx; +Cc: Philip.Yang, Christian.Koenig

On 2021-03-09 11:19 a.m., Jonathan Kim wrote:
> Add IH function to allow caller to wait until ring entries are processed
> until the checkpoint write pointer.
>
> This will be primarily used by HMM to drain pending page fault interrupts
> before memory unmap to prevent HMM from handling stale interrupts.
>
> v3: Scrap busy loop and change to wait_event.
>
> v2: Update title and description to clarify use.
> Add rptr/wptr wrap counter checks to guarantee ring entries are processed
> until the checkpoint.
>
> Suggested-by: Christian Koenig <christian.koenig@amd.com>
> Signed-off-by: Jonathan Kim <jonathan.kim@amd.com>

Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>


> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c | 49 ++++++++++++++++++++++++++
>   drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h |  5 +++
>   2 files changed, 54 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
> index dc852af4f3b7..1024065f1f03 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
> @@ -99,6 +99,8 @@ int amdgpu_ih_ring_init(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih,
>   		ih->rptr_addr = adev->wb.gpu_addr + rptr_offs * 4;
>   		ih->rptr_cpu = &adev->wb.wb[rptr_offs];
>   	}
> +
> +	init_waitqueue_head(&ih->wait_process);
>   	return 0;
>   }
>   
> @@ -160,6 +162,52 @@ void amdgpu_ih_ring_write(struct amdgpu_ih_ring *ih, const uint32_t *iv,
>   	}
>   }
>   
> +/* Waiter helper that checks current rptr matches or passes checkpoint wptr */
> +static bool amdgpu_ih_has_checkpoint_processed(struct amdgpu_device *adev,
> +					struct amdgpu_ih_ring *ih,
> +					uint32_t checkpoint_wptr,
> +					uint32_t *prev_rptr)
> +{
> +	uint32_t cur_rptr = ih->rptr | (*prev_rptr & ~ih->ptr_mask);
> +
> +	/* rptr has wrapped. */
> +	if (cur_rptr < *prev_rptr)
> +		cur_rptr += ih->ptr_mask + 1;
> +	*prev_rptr = cur_rptr;
> +
> +	return cur_rptr >= checkpoint_wptr;
> +}
> +
> +/**
> + * amdgpu_ih_wait_on_checkpoint_process - wait to process IVs up to checkpoint
> + *
> + * @adev: amdgpu_device pointer
> + * @ih: ih ring to process
> + *
> + * Used to ensure ring has processed IVs up to the checkpoint write pointer.
> + */
> +int amdgpu_ih_wait_on_checkpoint_process(struct amdgpu_device *adev,
> +					struct amdgpu_ih_ring *ih)
> +{
> +	uint32_t checkpoint_wptr, rptr;
> +
> +	if (!ih->enabled || adev->shutdown)
> +		return -ENODEV;
> +
> +	checkpoint_wptr = amdgpu_ih_get_wptr(adev, ih);
> +	/* Order wptr with rptr. */
> +	rmb();
> +	rptr = READ_ONCE(ih->rptr);
> +
> +	/* wptr has wrapped. */
> +	if (rptr > checkpoint_wptr)
> +		checkpoint_wptr += ih->ptr_mask + 1;
> +
> +	return wait_event_interruptible(ih->wait_process,
> +				amdgpu_ih_has_checkpoint_processed(adev, ih,
> +						checkpoint_wptr, &rptr));
> +}
> +
>   /**
>    * amdgpu_ih_process - interrupt handler
>    *
> @@ -195,6 +243,7 @@ int amdgpu_ih_process(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih)
>   	}
>   
>   	amdgpu_ih_set_rptr(adev, ih);
> +	wake_up_all(&ih->wait_process);
>   	atomic_set(&ih->lock, 0);
>   
>   	/* make sure wptr hasn't changed while processing */
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h
> index 6ed4a85fc7c3..87ec6d20dbe0 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h
> @@ -66,6 +66,9 @@ struct amdgpu_ih_ring {
>   	unsigned		rptr;
>   	atomic_t		lock;
>   	struct amdgpu_ih_regs	ih_regs;
> +
> +	/* For waiting on IH processing at checkpoint. */
> +	wait_queue_head_t wait_process;
>   };
>   
>   /* provided by the ih block */
> @@ -87,6 +90,8 @@ int amdgpu_ih_ring_init(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih,
>   void amdgpu_ih_ring_fini(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih);
>   void amdgpu_ih_ring_write(struct amdgpu_ih_ring *ih, const uint32_t *iv,
>   			  unsigned int num_dw);
> +int amdgpu_ih_wait_on_checkpoint_process(struct amdgpu_device *adev,
> +					struct amdgpu_ih_ring *ih);
>   int amdgpu_ih_process(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih);
>   void amdgpu_ih_decode_iv_helper(struct amdgpu_device *adev,
>   				struct amdgpu_ih_ring *ih,
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] drm/amdgpu: add ih waiter on process until checkpoint
  2021-03-08 21:09       ` Kim, Jonathan
@ 2021-03-09  9:09         ` Christian König
  0 siblings, 0 replies; 9+ messages in thread
From: Christian König @ 2021-03-09  9:09 UTC (permalink / raw)
  To: Kim, Jonathan, Koenig, Christian, amd-gfx; +Cc: Yang, Philip, Kuehling, Felix

Am 08.03.21 um 22:09 schrieb Kim, Jonathan:
> [SNIP]
>> First of all rptr/wptr are not 32bit, their maximum is 20 or 19 bits IIRC (and
>> they are dw, so 4M or 2M bytes).
>>
> Thanks Christian.  This makes sense now.  I can see how rptrs advance by dword sets in the iv decode helper.
> My apologies, but I'm still a bit confused on the pseudo code below and have a few questions before I give this another go ...
>
>> Then the purpose of the wait_event() is to wait for changes of the rptr, so
>> the matching wake_up() should be at the same place as calling
>> amdgpu_ih_set_rptr().
>>
>> My original idea of the wrap around counter assumes that the counter is
>> updated in amdgpu_ih_process(). That isn't strictly necessary, but it could be
>> better even if it adds some overhead to IH processing.
>>
>> If you want to implement it like below it should look something like this:
>>
>> uint32_t rptr, wptr, tmp;
>>
>> wptr = amdgpu_ih_get_wptr(adev, ih);
>> rmb();
>> rptr = READ_ONCE(ih->rptr);
>>
>> if (rptr > wptr)
>>       rptr += ih->ptr_mask + 1;
> So I think you're trying to be safe by assuming the rptr still needs to overtake the wptr and hasn't done so yet so this is looking ahead by saying the rptr will have to wrap?

I really need to wake up before writing stuff like that. No, the problem 
why you don't understand is that I mixed up rptr and wptr :)

>> wait_event(ig->processing, {
>>       tmp = amdgpu_ih_get_wptr(adev, ih);
>>       tmp |= wptr & ~ih->ptr_mask;
>>       if (tmp < wptr)
>>           tmp += ih->ptr_mask + 1;
>>       wptr = tmp;
> For short term use, I think the caller will try to guarantee that the checkpoint wptr is checked during a time when interrupts won't be generated anymore, but I guess for general use, we can't guarantee this and this is why we're using a moving wptr checkpoint?
>
>>       wptr >= rptr;
> Assuming you mean 'return wptr >= rptr' will unblock if true, I can see this for wptr == rptr, but why wptr > rptr?  What happens if the caller waits when nothing is wrapped but there are entries to be processed? (I'm assuming rptr < wptr can be true with entries that still require processing).  Won't the caller advance without waiting?

No as I wrote above I just completely mixed up things here. Let me try 
once more:

wptr = amdgpu_ih_get_wptr(adev, ih);
rmb();
rptr = READ_ONCE(ih->rptr);

if (rptr > wptr)
      wptr += ih->ptr_mask + 1;

wait_event(ig->processing, {
     tmp = ih->rptr;
     tmp |= rptr & ~ih->ptr_mask;
     if (tmp < rptr)
          tmp += ih->ptr_mask + 1;
     rptr = tmp;
     rptr >= wptr;
});

What I also wanted to note here is that using wait_event() makes things 
much easier since it provides the necessary memory barriers.

>> })
>>
>> I would put the condition of the wait_event into a separate function to make
>> it more readable and not use GCC extension, but essentially that's it.
>>
>> The only problem this could have is that the wptr wraps around multiple
>> times between wake ups. To handle this as well we would need to increment
>> the wrap around counter in amdgpu_ih_process() as well, but this means
>> some extra overhead during IH processing. And then I would also use 64bit
>> counters to make the stuff easier to handle.
> I'm still not fully clear on how 64-bit counters help.  It already looks like you're leveraging the ptr_mask limit of 19/20 bits to increment the unused top bits of the rptr/wptr by adding the rb size.

When you detect the rptr wrap around only on the waiters side there is 
the chance that you miss a wrap around and wait to long.

But I now think that is better than adding overhead to the processing side.

regards,
Christian.

>
> Thanks,
>
> Jon
>
>> Regards,
>> Christian.
>>
>>> Thanks,
>>>
>>> Jon
>>>
>>>> Christian.
>>>>
>>>>> +
>>>>> +     spin_begin();
>>>>> +     while (true) {
>>>>> +             bool rptr_wrapped = false, wptr_wrapped = false;
>>>>> +             u32 rptr, wptr;
>>>>> +
>>>>> +             spin_cpu_relax();
>>>>> +
>>>>> +             /* Update wptr checkpoint/wrap count compare. */
>>>>> +             wptr = amdgpu_ih_get_wptr(adev, ih);
>>>>> +             if (wptr < prev_wptr) {
>>>>> +                     wptr_wrap++;
>>>>> +                     wptr_check = wptr | (wptr_wrap << 32);
>>>>> +                     wptr_wrapped = true;
>>>>> +             }
>>>>> +             prev_wptr = wptr;
>>>>> +
>>>>> +             /* Order wptr with rptr. */
>>>>> +             rmb();
>>>>> +
>>>>> +             /* Update rptr/wrap count compare. */
>>>>> +             rptr = READ_ONCE(ih->rptr);
>>>>> +             if (rptr < prev_rptr) {
>>>>> +                     rptr_wrap++;
>>>>> +                     rptr_wrapped = true;
>>>>> +             }
>>>>> +             rptr_check = rptr | (rptr_wrap << 32);
>>>>> +             prev_rptr = rptr;
>>>>> +
>>>>> +             /* Wrapping occurred so restart. */
>>>>> +             if (rptr_wrapped || wptr_wrapped)
>>>>> +                     continue;
>>>>> +
>>>>> +             /* Exit on reaching or passing checkpoint. */
>>>>> +             if (rptr_check >= wptr_check &&
>>>>> +                                     rptr >= (wptr_check & ih->ptr_mask))
>>>>> +                     break;
>>>>> +     }
>>>>> +     spin_end();
>>>>> +
>>>>> +     return 0;
>>>>> +}
>>>>> +
>>>>>     /**
>>>>>      * amdgpu_ih_process - interrupt handler
>>>>>      *
>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h
>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h
>>>>> index 6ed4a85fc7c3..6817f0a812d2 100644
>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h
>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h
>>>>> @@ -87,6 +87,8 @@ int amdgpu_ih_ring_init(struct amdgpu_device
>>>>> *adev,
>>>> struct amdgpu_ih_ring *ih,
>>>>>     void amdgpu_ih_ring_fini(struct amdgpu_device *adev, struct
>>>> amdgpu_ih_ring *ih);
>>>>>     void amdgpu_ih_ring_write(struct amdgpu_ih_ring *ih, const uint32_t
>> *iv,
>>>>>                           unsigned int num_dw);
>>>>> +int amdgpu_ih_wait_on_checkpoint_process(struct amdgpu_device
>>>> *adev,
>>>>> +                                     struct amdgpu_ih_ring *ih);
>>>>>     int amdgpu_ih_process(struct amdgpu_device *adev, struct
>>>> amdgpu_ih_ring *ih);
>>>>>     void amdgpu_ih_decode_iv_helper(struct amdgpu_device *adev,
>>>>>                                 struct amdgpu_ih_ring *ih,

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 9+ messages in thread

* RE: [PATCH] drm/amdgpu: add ih waiter on process until checkpoint
  2021-03-06  9:12     ` Christian König
@ 2021-03-08 21:09       ` Kim, Jonathan
  2021-03-09  9:09         ` Christian König
  0 siblings, 1 reply; 9+ messages in thread
From: Kim, Jonathan @ 2021-03-08 21:09 UTC (permalink / raw)
  To: Koenig, Christian, Christian König, amd-gfx
  Cc: Yang, Philip, Kuehling, Felix

[AMD Official Use Only - Internal Distribution Only]

> -----Original Message-----
> From: Koenig, Christian <Christian.Koenig@amd.com>
> Sent: Saturday, March 6, 2021 4:12 AM
> To: Kim, Jonathan <Jonathan.Kim@amd.com>; Christian König
> <ckoenig.leichtzumerken@gmail.com>; amd-gfx@lists.freedesktop.org
> Cc: Yang, Philip <Philip.Yang@amd.com>; Kuehling, Felix
> <Felix.Kuehling@amd.com>
> Subject: Re: [PATCH] drm/amdgpu: add ih waiter on process until checkpoint
>
>
>
> Am 05.03.21 um 22:34 schrieb Kim, Jonathan:
> > [AMD Official Use Only - Internal Distribution Only]
> >
> >> -----Original Message-----
> >> From: Christian König <ckoenig.leichtzumerken@gmail.com>
> >> Sent: Friday, March 5, 2021 3:18 PM
> >> To: Kim, Jonathan <Jonathan.Kim@amd.com>; amd-
> >> gfx@lists.freedesktop.org
> >> Cc: Yang, Philip <Philip.Yang@amd.com>; Kuehling, Felix
> >> <Felix.Kuehling@amd.com>; Koenig, Christian
> >> <Christian.Koenig@amd.com>
> >> Subject: Re: [PATCH] drm/amdgpu: add ih waiter on process until
> >> checkpoint
> >>
> >> [CAUTION: External Email]
> >>
> >> Am 05.03.21 um 21:11 schrieb Jonathan Kim:
> >>> Add IH function to allow caller to wait until ring entries are
> >>> processed until the checkpoint write pointer.
> >>>
> >>> This will be primarily used by HMM to drain pending page fault
> >>> interrupts before memory unmap to prevent HMM from handling stale
> >> interrupts.
> >>> v2: Update title and description to clarify use.
> >>> Add rptr/wptr wrap counter checks to guarantee ring entries are
> >>> processed until the checkpoint.
> >> First of all as I said please use a wait_event, busy waiting is a clear NAK.
> > Who would do the wake though?  Are you suggesting wake be done in
> amdgpu_ih_process?  Or is waiting happening by the caller and this should go
> somewhere higher (like amdgpu_amdkfd for example)?
> >
> >>> Signed-off-by: Jonathan Kim <jonathan.kim@amd.com>
> >>> ---
> >>>    drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c | 68
> >> +++++++++++++++++++++++++-
> >>>    drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h |  2 +
> >>>    2 files changed, 69 insertions(+), 1 deletion(-)
> >>>
> >>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
> >>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
> >>> index dc852af4f3b7..954518b4fb79 100644
> >>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
> >>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
> >>> @@ -22,7 +22,7 @@
> >>>     */
> >>>
> >>>    #include <linux/dma-mapping.h>
> >>> -
> >>> +#include <linux/processor.h>
> >>>    #include "amdgpu.h"
> >>>    #include "amdgpu_ih.h"
> >>>
> >>> @@ -160,6 +160,72 @@ void amdgpu_ih_ring_write(struct
> >> amdgpu_ih_ring *ih, const uint32_t *iv,
> >>>        }
> >>>    }
> >>>
> >>> +/**
> >>> + * amdgpu_ih_wait_on_checkpoint_process - wait to process IVs up to
> >>> +checkpoint
> >>> + *
> >>> + * @adev: amdgpu_device pointer
> >>> + * @ih: ih ring to process
> >>> + *
> >>> + * Used to ensure ring has processed IVs up to the checkpoint write
> >> pointer.
> >>> + */
> >>> +int amdgpu_ih_wait_on_checkpoint_process(struct amdgpu_device
> >> *adev,
> >>> +                                     struct amdgpu_ih_ring *ih) {
> >>> +     u64 rptr_check, wptr_check, rptr_wrap = 0, wptr_wrap = 0;
> >>> +     u32 prev_rptr, prev_wptr;
> >>> +
> >>> +     if (!ih->enabled || adev->shutdown)
> >>> +             return -ENODEV;
> >>> +
> >>> +     prev_wptr = amdgpu_ih_get_wptr(adev, ih);
> >>> +     /* Order wptr with rptr. */
> >>> +     rmb();
> >>> +     prev_rptr = READ_ONCE(ih->rptr);
> >>> +     rptr_check = prev_rptr | (rptr_wrap << 32);
> >>> +     wptr_check = prev_wptr | (wptr_wrap << 32);
> >> Hui what? That check doesn't even make remotely sense to me.
> > Can you clarify what you meant by creating a new 64 bit compare?
> > Snip from your last response:
> >
> > "This way you do something like this:
> > 1. get the wrap around counter.
> > 2. get wptr
> > 3. get rptr
> > 4. compare the wrap around counter with the old one, if it has changed
> > start over with #1 5. Use wrap around counter and rptr/wptr to come up
> with 64bit values.
> > 6. Compare wptr with rptr/wrap around counter until we are sure the IHs
> are processed."
> >
> >  From a discussion with Felix, I interpreted this as a way to guarantee
> rptr/wtpr ordering so that rptr monotonically follows wptr per check.
> > I'm assuming rptr/wptrs are 32 bits wide by the use of ptr_mask on
> read/write functions so a respective mask of rptr/wptr wrap count to the top
> 32 bits would mark how far apart the rptr and wptr are per check.
>
> Mhm, sounds like my description was a bit confusing. Let me try again.
>
> First of all rptr/wptr are not 32bit, their maximum is 20 or 19 bits IIRC (and
> they are dw, so 4M or 2M bytes).
>

Thanks Christian.  This makes sense now.  I can see how rptrs advance by dword sets in the iv decode helper.
My apologies, but I'm still a bit confused on the pseudo code below and have a few questions before I give this another go ...

> Then the purpose of the wait_event() is to wait for changes of the rptr, so
> the matching wake_up() should be at the same place as calling
> amdgpu_ih_set_rptr().
>
> My original idea of the wrap around counter assumes that the counter is
> updated in amdgpu_ih_process(). That isn't strictly necessary, but it could be
> better even if it adds some overhead to IH processing.
>
> If you want to implement it like below it should look something like this:
>
> uint32_t rptr, wptr, tmp;
>
> wptr = amdgpu_ih_get_wptr(adev, ih);
> rmb();
> rptr = READ_ONCE(ih->rptr);
>
> if (rptr > wptr)
>      rptr += ih->ptr_mask + 1;

So I think you're trying to be safe by assuming the rptr still needs to overtake the wptr and hasn't done so yet so this is looking ahead by saying the rptr will have to wrap?

>
> wait_event(ig->processing, {
>      tmp = amdgpu_ih_get_wptr(adev, ih);
>      tmp |= wptr & ~ih->ptr_mask;
>      if (tmp < wptr)
>          tmp += ih->ptr_mask + 1;
>      wptr = tmp;

For short term use, I think the caller will try to guarantee that the checkpoint wptr is checked during a time when interrupts won't be generated anymore, but I guess for general use, we can't guarantee this and this is why we're using a moving wptr checkpoint?

>      wptr >= rptr;

Assuming you mean 'return wptr >= rptr' will unblock if true, I can see this for wptr == rptr, but why wptr > rptr?  What happens if the caller waits when nothing is wrapped but there are entries to be processed? (I'm assuming rptr < wptr can be true with entries that still require processing).  Won't the caller advance without waiting?

> })
>
> I would put the condition of the wait_event into a separate function to make
> it more readable and not use GCC extension, but essentially that's it.
>
> The only problem this could have is that the wptr wraps around multiple
> times between wake ups. To handle this as well we would need to increment
> the wrap around counter in amdgpu_ih_process() as well, but this means
> some extra overhead during IH processing. And then I would also use 64bit
> counters to make the stuff easier to handle.

I'm still not fully clear on how 64-bit counters help.  It already looks like you're leveraging the ptr_mask limit of 19/20 bits to increment the unused top bits of the rptr/wptr by adding the rb size.

Thanks,

Jon

>
> Regards,
> Christian.
>
> >
> > Thanks,
> >
> > Jon
> >
> >> Christian.
> >>
> >>> +
> >>> +     spin_begin();
> >>> +     while (true) {
> >>> +             bool rptr_wrapped = false, wptr_wrapped = false;
> >>> +             u32 rptr, wptr;
> >>> +
> >>> +             spin_cpu_relax();
> >>> +
> >>> +             /* Update wptr checkpoint/wrap count compare. */
> >>> +             wptr = amdgpu_ih_get_wptr(adev, ih);
> >>> +             if (wptr < prev_wptr) {
> >>> +                     wptr_wrap++;
> >>> +                     wptr_check = wptr | (wptr_wrap << 32);
> >>> +                     wptr_wrapped = true;
> >>> +             }
> >>> +             prev_wptr = wptr;
> >>> +
> >>> +             /* Order wptr with rptr. */
> >>> +             rmb();
> >>> +
> >>> +             /* Update rptr/wrap count compare. */
> >>> +             rptr = READ_ONCE(ih->rptr);
> >>> +             if (rptr < prev_rptr) {
> >>> +                     rptr_wrap++;
> >>> +                     rptr_wrapped = true;
> >>> +             }
> >>> +             rptr_check = rptr | (rptr_wrap << 32);
> >>> +             prev_rptr = rptr;
> >>> +
> >>> +             /* Wrapping occurred so restart. */
> >>> +             if (rptr_wrapped || wptr_wrapped)
> >>> +                     continue;
> >>> +
> >>> +             /* Exit on reaching or passing checkpoint. */
> >>> +             if (rptr_check >= wptr_check &&
> >>> +                                     rptr >= (wptr_check & ih->ptr_mask))
> >>> +                     break;
> >>> +     }
> >>> +     spin_end();
> >>> +
> >>> +     return 0;
> >>> +}
> >>> +
> >>>    /**
> >>>     * amdgpu_ih_process - interrupt handler
> >>>     *
> >>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h
> >>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h
> >>> index 6ed4a85fc7c3..6817f0a812d2 100644
> >>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h
> >>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h
> >>> @@ -87,6 +87,8 @@ int amdgpu_ih_ring_init(struct amdgpu_device
> >>> *adev,
> >> struct amdgpu_ih_ring *ih,
> >>>    void amdgpu_ih_ring_fini(struct amdgpu_device *adev, struct
> >> amdgpu_ih_ring *ih);
> >>>    void amdgpu_ih_ring_write(struct amdgpu_ih_ring *ih, const uint32_t
> *iv,
> >>>                          unsigned int num_dw);
> >>> +int amdgpu_ih_wait_on_checkpoint_process(struct amdgpu_device
> >> *adev,
> >>> +                                     struct amdgpu_ih_ring *ih);
> >>>    int amdgpu_ih_process(struct amdgpu_device *adev, struct
> >> amdgpu_ih_ring *ih);
> >>>    void amdgpu_ih_decode_iv_helper(struct amdgpu_device *adev,
> >>>                                struct amdgpu_ih_ring *ih,

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] drm/amdgpu: add ih waiter on process until checkpoint
  2021-03-05 21:34   ` Kim, Jonathan
@ 2021-03-06  9:12     ` Christian König
  2021-03-08 21:09       ` Kim, Jonathan
  0 siblings, 1 reply; 9+ messages in thread
From: Christian König @ 2021-03-06  9:12 UTC (permalink / raw)
  To: Kim, Jonathan, Christian König, amd-gfx
  Cc: Yang, Philip, Kuehling, Felix



Am 05.03.21 um 22:34 schrieb Kim, Jonathan:
> [AMD Official Use Only - Internal Distribution Only]
>
>> -----Original Message-----
>> From: Christian König <ckoenig.leichtzumerken@gmail.com>
>> Sent: Friday, March 5, 2021 3:18 PM
>> To: Kim, Jonathan <Jonathan.Kim@amd.com>; amd-
>> gfx@lists.freedesktop.org
>> Cc: Yang, Philip <Philip.Yang@amd.com>; Kuehling, Felix
>> <Felix.Kuehling@amd.com>; Koenig, Christian <Christian.Koenig@amd.com>
>> Subject: Re: [PATCH] drm/amdgpu: add ih waiter on process until checkpoint
>>
>> [CAUTION: External Email]
>>
>> Am 05.03.21 um 21:11 schrieb Jonathan Kim:
>>> Add IH function to allow caller to wait until ring entries are
>>> processed until the checkpoint write pointer.
>>>
>>> This will be primarily used by HMM to drain pending page fault
>>> interrupts before memory unmap to prevent HMM from handling stale
>> interrupts.
>>> v2: Update title and description to clarify use.
>>> Add rptr/wptr wrap counter checks to guarantee ring entries are
>>> processed until the checkpoint.
>> First of all as I said please use a wait_event, busy waiting is a clear NAK.
> Who would do the wake though?  Are you suggesting wake be done in amdgpu_ih_process?  Or is waiting happening by the caller and this should go somewhere higher (like amdgpu_amdkfd for example)?
>
>>> Signed-off-by: Jonathan Kim <jonathan.kim@amd.com>
>>> ---
>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c | 68
>> +++++++++++++++++++++++++-
>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h |  2 +
>>>    2 files changed, 69 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
>>> index dc852af4f3b7..954518b4fb79 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
>>> @@ -22,7 +22,7 @@
>>>     */
>>>
>>>    #include <linux/dma-mapping.h>
>>> -
>>> +#include <linux/processor.h>
>>>    #include "amdgpu.h"
>>>    #include "amdgpu_ih.h"
>>>
>>> @@ -160,6 +160,72 @@ void amdgpu_ih_ring_write(struct
>> amdgpu_ih_ring *ih, const uint32_t *iv,
>>>        }
>>>    }
>>>
>>> +/**
>>> + * amdgpu_ih_wait_on_checkpoint_process - wait to process IVs up to
>>> +checkpoint
>>> + *
>>> + * @adev: amdgpu_device pointer
>>> + * @ih: ih ring to process
>>> + *
>>> + * Used to ensure ring has processed IVs up to the checkpoint write
>> pointer.
>>> + */
>>> +int amdgpu_ih_wait_on_checkpoint_process(struct amdgpu_device
>> *adev,
>>> +                                     struct amdgpu_ih_ring *ih) {
>>> +     u64 rptr_check, wptr_check, rptr_wrap = 0, wptr_wrap = 0;
>>> +     u32 prev_rptr, prev_wptr;
>>> +
>>> +     if (!ih->enabled || adev->shutdown)
>>> +             return -ENODEV;
>>> +
>>> +     prev_wptr = amdgpu_ih_get_wptr(adev, ih);
>>> +     /* Order wptr with rptr. */
>>> +     rmb();
>>> +     prev_rptr = READ_ONCE(ih->rptr);
>>> +     rptr_check = prev_rptr | (rptr_wrap << 32);
>>> +     wptr_check = prev_wptr | (wptr_wrap << 32);
>> Hui what? That check doesn't even make remotely sense to me.
> Can you clarify what you meant by creating a new 64 bit compare?
> Snip from your last response:
>
> "This way you do something like this:
> 1. get the wrap around counter.
> 2. get wptr
> 3. get rptr
> 4. compare the wrap around counter with the old one, if it has changed start over with #1
> 5. Use wrap around counter and rptr/wptr to come up with 64bit values.
> 6. Compare wptr with rptr/wrap around counter until we are sure the IHs are processed."
>
>  From a discussion with Felix, I interpreted this as a way to guarantee rptr/wtpr ordering so that rptr monotonically follows wptr per check.
> I'm assuming rptr/wptrs are 32 bits wide by the use of ptr_mask on read/write functions so a respective mask of rptr/wptr wrap count to the top 32 bits would mark how far apart the rptr and wptr are per check.

Mhm, sounds like my description was a bit confusing. Let me try again.

First of all rptr/wptr are not 32bit, their maximum is 20 or 19 bits 
IIRC (and they are dw, so 4M or 2M bytes).

Then the purpose of the wait_event() is to wait for changes of the rptr, 
so the matching wake_up() should be at the same place as calling 
amdgpu_ih_set_rptr().

My original idea of the wrap around counter assumes that the counter is 
updated in amdgpu_ih_process(). That isn't strictly necessary, but it 
could be better even if it adds some overhead to IH processing.

If you want to implement it like below it should look something like this:

uint32_t rptr, wptr, tmp;

wptr = amdgpu_ih_get_wptr(adev, ih);
rmb();
rptr = READ_ONCE(ih->rptr);

if (rptr > wptr)
     rptr += ih->ptr_mask + 1;

wait_event(ig->processing, {
     tmp = amdgpu_ih_get_wptr(adev, ih);
     tmp |= wptr & ~ih->ptr_mask;
     if (tmp < wptr)
         tmp += ih->ptr_mask + 1;
     wptr = tmp;
     wptr >= rptr;
})

I would put the condition of the wait_event into a separate function to 
make it more readable and not use GCC extension, but essentially that's it.

The only problem this could have is that the wptr wraps around multiple 
times between wake ups. To handle this as well we would need to 
increment the wrap around counter in amdgpu_ih_process() as well, but 
this means some extra overhead during IH processing. And then I would 
also use 64bit counters to make the stuff easier to handle.

Regards,
Christian.

>
> Thanks,
>
> Jon
>
>> Christian.
>>
>>> +
>>> +     spin_begin();
>>> +     while (true) {
>>> +             bool rptr_wrapped = false, wptr_wrapped = false;
>>> +             u32 rptr, wptr;
>>> +
>>> +             spin_cpu_relax();
>>> +
>>> +             /* Update wptr checkpoint/wrap count compare. */
>>> +             wptr = amdgpu_ih_get_wptr(adev, ih);
>>> +             if (wptr < prev_wptr) {
>>> +                     wptr_wrap++;
>>> +                     wptr_check = wptr | (wptr_wrap << 32);
>>> +                     wptr_wrapped = true;
>>> +             }
>>> +             prev_wptr = wptr;
>>> +
>>> +             /* Order wptr with rptr. */
>>> +             rmb();
>>> +
>>> +             /* Update rptr/wrap count compare. */
>>> +             rptr = READ_ONCE(ih->rptr);
>>> +             if (rptr < prev_rptr) {
>>> +                     rptr_wrap++;
>>> +                     rptr_wrapped = true;
>>> +             }
>>> +             rptr_check = rptr | (rptr_wrap << 32);
>>> +             prev_rptr = rptr;
>>> +
>>> +             /* Wrapping occurred so restart. */
>>> +             if (rptr_wrapped || wptr_wrapped)
>>> +                     continue;
>>> +
>>> +             /* Exit on reaching or passing checkpoint. */
>>> +             if (rptr_check >= wptr_check &&
>>> +                                     rptr >= (wptr_check & ih->ptr_mask))
>>> +                     break;
>>> +     }
>>> +     spin_end();
>>> +
>>> +     return 0;
>>> +}
>>> +
>>>    /**
>>>     * amdgpu_ih_process - interrupt handler
>>>     *
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h
>>> index 6ed4a85fc7c3..6817f0a812d2 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h
>>> @@ -87,6 +87,8 @@ int amdgpu_ih_ring_init(struct amdgpu_device *adev,
>> struct amdgpu_ih_ring *ih,
>>>    void amdgpu_ih_ring_fini(struct amdgpu_device *adev, struct
>> amdgpu_ih_ring *ih);
>>>    void amdgpu_ih_ring_write(struct amdgpu_ih_ring *ih, const uint32_t *iv,
>>>                          unsigned int num_dw);
>>> +int amdgpu_ih_wait_on_checkpoint_process(struct amdgpu_device
>> *adev,
>>> +                                     struct amdgpu_ih_ring *ih);
>>>    int amdgpu_ih_process(struct amdgpu_device *adev, struct
>> amdgpu_ih_ring *ih);
>>>    void amdgpu_ih_decode_iv_helper(struct amdgpu_device *adev,
>>>                                struct amdgpu_ih_ring *ih,

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 9+ messages in thread

* RE: [PATCH] drm/amdgpu: add ih waiter on process until checkpoint
  2021-03-05 20:17 ` Christian König
@ 2021-03-05 21:34   ` Kim, Jonathan
  2021-03-06  9:12     ` Christian König
  0 siblings, 1 reply; 9+ messages in thread
From: Kim, Jonathan @ 2021-03-05 21:34 UTC (permalink / raw)
  To: Christian König, amd-gfx
  Cc: Yang, Philip, Kuehling, Felix, Koenig, Christian

[AMD Official Use Only - Internal Distribution Only]

> -----Original Message-----
> From: Christian König <ckoenig.leichtzumerken@gmail.com>
> Sent: Friday, March 5, 2021 3:18 PM
> To: Kim, Jonathan <Jonathan.Kim@amd.com>; amd-
> gfx@lists.freedesktop.org
> Cc: Yang, Philip <Philip.Yang@amd.com>; Kuehling, Felix
> <Felix.Kuehling@amd.com>; Koenig, Christian <Christian.Koenig@amd.com>
> Subject: Re: [PATCH] drm/amdgpu: add ih waiter on process until checkpoint
>
> [CAUTION: External Email]
>
> Am 05.03.21 um 21:11 schrieb Jonathan Kim:
> > Add IH function to allow caller to wait until ring entries are
> > processed until the checkpoint write pointer.
> >
> > This will be primarily used by HMM to drain pending page fault
> > interrupts before memory unmap to prevent HMM from handling stale
> interrupts.
> >
> > v2: Update title and description to clarify use.
> > Add rptr/wptr wrap counter checks to guarantee ring entries are
> > processed until the checkpoint.
>
> First of all as I said please use a wait_event, busy waiting is a clear NAK.

Who would do the wake though?  Are you suggesting wake be done in amdgpu_ih_process?  Or is waiting happening by the caller and this should go somewhere higher (like amdgpu_amdkfd for example)?

>
> >
> > Signed-off-by: Jonathan Kim <jonathan.kim@amd.com>
> > ---
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c | 68
> +++++++++++++++++++++++++-
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h |  2 +
> >   2 files changed, 69 insertions(+), 1 deletion(-)
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
> > b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
> > index dc852af4f3b7..954518b4fb79 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
> > @@ -22,7 +22,7 @@
> >    */
> >
> >   #include <linux/dma-mapping.h>
> > -
> > +#include <linux/processor.h>
> >   #include "amdgpu.h"
> >   #include "amdgpu_ih.h"
> >
> > @@ -160,6 +160,72 @@ void amdgpu_ih_ring_write(struct
> amdgpu_ih_ring *ih, const uint32_t *iv,
> >       }
> >   }
> >
> > +/**
> > + * amdgpu_ih_wait_on_checkpoint_process - wait to process IVs up to
> > +checkpoint
> > + *
> > + * @adev: amdgpu_device pointer
> > + * @ih: ih ring to process
> > + *
> > + * Used to ensure ring has processed IVs up to the checkpoint write
> pointer.
> > + */
> > +int amdgpu_ih_wait_on_checkpoint_process(struct amdgpu_device
> *adev,
> > +                                     struct amdgpu_ih_ring *ih) {
> > +     u64 rptr_check, wptr_check, rptr_wrap = 0, wptr_wrap = 0;
> > +     u32 prev_rptr, prev_wptr;
> > +
> > +     if (!ih->enabled || adev->shutdown)
> > +             return -ENODEV;
> > +
> > +     prev_wptr = amdgpu_ih_get_wptr(adev, ih);
> > +     /* Order wptr with rptr. */
> > +     rmb();
> > +     prev_rptr = READ_ONCE(ih->rptr);
> > +     rptr_check = prev_rptr | (rptr_wrap << 32);
> > +     wptr_check = prev_wptr | (wptr_wrap << 32);
>
> Hui what? That check doesn't even make remotely sense to me.

Can you clarify what you meant by creating a new 64 bit compare?
Snip from your last response:

"This way you do something like this:
1. get the wrap around counter.
2. get wptr
3. get rptr
4. compare the wrap around counter with the old one, if it has changed start over with #1
5. Use wrap around counter and rptr/wptr to come up with 64bit values.
6. Compare wptr with rptr/wrap around counter until we are sure the IHs are processed."

From a discussion with Felix, I interpreted this as a way to guarantee rptr/wtpr ordering so that rptr monotonically follows wptr per check.
I'm assuming rptr/wptrs are 32 bits wide by the use of ptr_mask on read/write functions so a respective mask of rptr/wptr wrap count to the top 32 bits would mark how far apart the rptr and wptr are per check.

Thanks,

Jon

>
> Christian.
>
> > +
> > +     spin_begin();
> > +     while (true) {
> > +             bool rptr_wrapped = false, wptr_wrapped = false;
> > +             u32 rptr, wptr;
> > +
> > +             spin_cpu_relax();
> > +
> > +             /* Update wptr checkpoint/wrap count compare. */
> > +             wptr = amdgpu_ih_get_wptr(adev, ih);
> > +             if (wptr < prev_wptr) {
> > +                     wptr_wrap++;
> > +                     wptr_check = wptr | (wptr_wrap << 32);
> > +                     wptr_wrapped = true;
> > +             }
> > +             prev_wptr = wptr;
> > +
> > +             /* Order wptr with rptr. */
> > +             rmb();
> > +
> > +             /* Update rptr/wrap count compare. */
> > +             rptr = READ_ONCE(ih->rptr);
> > +             if (rptr < prev_rptr) {
> > +                     rptr_wrap++;
> > +                     rptr_wrapped = true;
> > +             }
> > +             rptr_check = rptr | (rptr_wrap << 32);
> > +             prev_rptr = rptr;
> > +
> > +             /* Wrapping occurred so restart. */
> > +             if (rptr_wrapped || wptr_wrapped)
> > +                     continue;
> > +
> > +             /* Exit on reaching or passing checkpoint. */
> > +             if (rptr_check >= wptr_check &&
> > +                                     rptr >= (wptr_check & ih->ptr_mask))
> > +                     break;
> > +     }
> > +     spin_end();
> > +
> > +     return 0;
> > +}
> > +
> >   /**
> >    * amdgpu_ih_process - interrupt handler
> >    *
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h
> > b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h
> > index 6ed4a85fc7c3..6817f0a812d2 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h
> > @@ -87,6 +87,8 @@ int amdgpu_ih_ring_init(struct amdgpu_device *adev,
> struct amdgpu_ih_ring *ih,
> >   void amdgpu_ih_ring_fini(struct amdgpu_device *adev, struct
> amdgpu_ih_ring *ih);
> >   void amdgpu_ih_ring_write(struct amdgpu_ih_ring *ih, const uint32_t *iv,
> >                         unsigned int num_dw);
> > +int amdgpu_ih_wait_on_checkpoint_process(struct amdgpu_device
> *adev,
> > +                                     struct amdgpu_ih_ring *ih);
> >   int amdgpu_ih_process(struct amdgpu_device *adev, struct
> amdgpu_ih_ring *ih);
> >   void amdgpu_ih_decode_iv_helper(struct amdgpu_device *adev,
> >                               struct amdgpu_ih_ring *ih,

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] drm/amdgpu: add ih waiter on process until checkpoint
  2021-03-05 20:11 Jonathan Kim
@ 2021-03-05 20:17 ` Christian König
  2021-03-05 21:34   ` Kim, Jonathan
  0 siblings, 1 reply; 9+ messages in thread
From: Christian König @ 2021-03-05 20:17 UTC (permalink / raw)
  To: Jonathan Kim, amd-gfx; +Cc: Philip.Yang, Felix.Kuehling, Christian.Koenig

Am 05.03.21 um 21:11 schrieb Jonathan Kim:
> Add IH function to allow caller to wait until ring entries are processed
> until the checkpoint write pointer.
>
> This will be primarily used by HMM to drain pending page fault interrupts
> before memory unmap to prevent HMM from handling stale interrupts.
>
> v2: Update title and description to clarify use.
> Add rptr/wptr wrap counter checks to guarantee ring entries are processed
> until the checkpoint.

First of all as I said please use a wait_event, busy waiting is a clear NAK.

>
> Signed-off-by: Jonathan Kim <jonathan.kim@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c | 68 +++++++++++++++++++++++++-
>   drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h |  2 +
>   2 files changed, 69 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
> index dc852af4f3b7..954518b4fb79 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
> @@ -22,7 +22,7 @@
>    */
>   
>   #include <linux/dma-mapping.h>
> -
> +#include <linux/processor.h>
>   #include "amdgpu.h"
>   #include "amdgpu_ih.h"
>   
> @@ -160,6 +160,72 @@ void amdgpu_ih_ring_write(struct amdgpu_ih_ring *ih, const uint32_t *iv,
>   	}
>   }
>   
> +/**
> + * amdgpu_ih_wait_on_checkpoint_process - wait to process IVs up to checkpoint
> + *
> + * @adev: amdgpu_device pointer
> + * @ih: ih ring to process
> + *
> + * Used to ensure ring has processed IVs up to the checkpoint write pointer.
> + */
> +int amdgpu_ih_wait_on_checkpoint_process(struct amdgpu_device *adev,
> +					struct amdgpu_ih_ring *ih)
> +{
> +	u64 rptr_check, wptr_check, rptr_wrap = 0, wptr_wrap = 0;
> +	u32 prev_rptr, prev_wptr;
> +
> +	if (!ih->enabled || adev->shutdown)
> +		return -ENODEV;
> +
> +	prev_wptr = amdgpu_ih_get_wptr(adev, ih);
> +	/* Order wptr with rptr. */
> +	rmb();
> +	prev_rptr = READ_ONCE(ih->rptr);
> +	rptr_check = prev_rptr | (rptr_wrap << 32);
> +	wptr_check = prev_wptr | (wptr_wrap << 32);

Hui what? That check doesn't even make remotely sense to me.

Christian.

> +
> +	spin_begin();
> +	while (true) {
> +		bool rptr_wrapped = false, wptr_wrapped = false;
> +		u32 rptr, wptr;
> +
> +		spin_cpu_relax();
> +
> +		/* Update wptr checkpoint/wrap count compare. */
> +		wptr = amdgpu_ih_get_wptr(adev, ih);
> +		if (wptr < prev_wptr) {
> +			wptr_wrap++;
> +			wptr_check = wptr | (wptr_wrap << 32);
> +			wptr_wrapped = true;
> +		}
> +		prev_wptr = wptr;
> +
> +		/* Order wptr with rptr. */
> +		rmb();
> +
> +		/* Update rptr/wrap count compare. */
> +		rptr = READ_ONCE(ih->rptr);
> +		if (rptr < prev_rptr) {
> +			rptr_wrap++;
> +			rptr_wrapped = true;
> +		}
> +		rptr_check = rptr | (rptr_wrap << 32);
> +		prev_rptr = rptr;
> +
> +		/* Wrapping occurred so restart. */
> +		if (rptr_wrapped || wptr_wrapped)
> +			continue;
> +
> +		/* Exit on reaching or passing checkpoint. */
> +		if (rptr_check >= wptr_check &&
> +					rptr >= (wptr_check & ih->ptr_mask))
> +			break;
> +	}
> +	spin_end();
> +
> +	return 0;
> +}
> +
>   /**
>    * amdgpu_ih_process - interrupt handler
>    *
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h
> index 6ed4a85fc7c3..6817f0a812d2 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h
> @@ -87,6 +87,8 @@ int amdgpu_ih_ring_init(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih,
>   void amdgpu_ih_ring_fini(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih);
>   void amdgpu_ih_ring_write(struct amdgpu_ih_ring *ih, const uint32_t *iv,
>   			  unsigned int num_dw);
> +int amdgpu_ih_wait_on_checkpoint_process(struct amdgpu_device *adev,
> +					struct amdgpu_ih_ring *ih);
>   int amdgpu_ih_process(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih);
>   void amdgpu_ih_decode_iv_helper(struct amdgpu_device *adev,
>   				struct amdgpu_ih_ring *ih,

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [PATCH] drm/amdgpu: add ih waiter on process until checkpoint
@ 2021-03-05 20:11 Jonathan Kim
  2021-03-05 20:17 ` Christian König
  0 siblings, 1 reply; 9+ messages in thread
From: Jonathan Kim @ 2021-03-05 20:11 UTC (permalink / raw)
  To: amd-gfx; +Cc: Philip.Yang, Felix.Kuehling, Jonathan Kim, Christian.Koenig

Add IH function to allow caller to wait until ring entries are processed
until the checkpoint write pointer.

This will be primarily used by HMM to drain pending page fault interrupts
before memory unmap to prevent HMM from handling stale interrupts.

v2: Update title and description to clarify use.
Add rptr/wptr wrap counter checks to guarantee ring entries are processed
until the checkpoint.

Signed-off-by: Jonathan Kim <jonathan.kim@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c | 68 +++++++++++++++++++++++++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h |  2 +
 2 files changed, 69 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
index dc852af4f3b7..954518b4fb79 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
@@ -22,7 +22,7 @@
  */
 
 #include <linux/dma-mapping.h>
-
+#include <linux/processor.h>
 #include "amdgpu.h"
 #include "amdgpu_ih.h"
 
@@ -160,6 +160,72 @@ void amdgpu_ih_ring_write(struct amdgpu_ih_ring *ih, const uint32_t *iv,
 	}
 }
 
+/**
+ * amdgpu_ih_wait_on_checkpoint_process - wait to process IVs up to checkpoint
+ *
+ * @adev: amdgpu_device pointer
+ * @ih: ih ring to process
+ *
+ * Used to ensure ring has processed IVs up to the checkpoint write pointer.
+ */
+int amdgpu_ih_wait_on_checkpoint_process(struct amdgpu_device *adev,
+					struct amdgpu_ih_ring *ih)
+{
+	u64 rptr_check, wptr_check, rptr_wrap = 0, wptr_wrap = 0;
+	u32 prev_rptr, prev_wptr;
+
+	if (!ih->enabled || adev->shutdown)
+		return -ENODEV;
+
+	prev_wptr = amdgpu_ih_get_wptr(adev, ih);
+	/* Order wptr with rptr. */
+	rmb();
+	prev_rptr = READ_ONCE(ih->rptr);
+	rptr_check = prev_rptr | (rptr_wrap << 32);
+	wptr_check = prev_wptr | (wptr_wrap << 32);
+
+	spin_begin();
+	while (true) {
+		bool rptr_wrapped = false, wptr_wrapped = false;
+		u32 rptr, wptr;
+
+		spin_cpu_relax();
+
+		/* Update wptr checkpoint/wrap count compare. */
+		wptr = amdgpu_ih_get_wptr(adev, ih);
+		if (wptr < prev_wptr) {
+			wptr_wrap++;
+			wptr_check = wptr | (wptr_wrap << 32);
+			wptr_wrapped = true;
+		}
+		prev_wptr = wptr;
+
+		/* Order wptr with rptr. */
+		rmb();
+
+		/* Update rptr/wrap count compare. */
+		rptr = READ_ONCE(ih->rptr);
+		if (rptr < prev_rptr) {
+			rptr_wrap++;
+			rptr_wrapped = true;
+		}
+		rptr_check = rptr | (rptr_wrap << 32);
+		prev_rptr = rptr;
+
+		/* Wrapping occurred so restart. */
+		if (rptr_wrapped || wptr_wrapped)
+			continue;
+
+		/* Exit on reaching or passing checkpoint. */
+		if (rptr_check >= wptr_check &&
+					rptr >= (wptr_check & ih->ptr_mask))
+			break;
+	}
+	spin_end();
+
+	return 0;
+}
+
 /**
  * amdgpu_ih_process - interrupt handler
  *
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h
index 6ed4a85fc7c3..6817f0a812d2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h
@@ -87,6 +87,8 @@ int amdgpu_ih_ring_init(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih,
 void amdgpu_ih_ring_fini(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih);
 void amdgpu_ih_ring_write(struct amdgpu_ih_ring *ih, const uint32_t *iv,
 			  unsigned int num_dw);
+int amdgpu_ih_wait_on_checkpoint_process(struct amdgpu_device *adev,
+					struct amdgpu_ih_ring *ih);
 int amdgpu_ih_process(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih);
 void amdgpu_ih_decode_iv_helper(struct amdgpu_device *adev,
 				struct amdgpu_ih_ring *ih,
-- 
2.17.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2021-03-10 22:29 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-03-09 16:19 [PATCH] drm/amdgpu: add ih waiter on process until checkpoint Jonathan Kim
2021-03-09 17:40 ` Christian König
2021-03-10 22:29 ` Felix Kuehling
  -- strict thread matches above, loose matches on Subject: below --
2021-03-05 20:11 Jonathan Kim
2021-03-05 20:17 ` Christian König
2021-03-05 21:34   ` Kim, Jonathan
2021-03-06  9:12     ` Christian König
2021-03-08 21:09       ` Kim, Jonathan
2021-03-09  9:09         ` Christian König

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.