All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] drm/i915: Check for rq->hwsp validity after acquiring RCU lock
@ 2020-12-18  9:19 ` Chris Wilson
  0 siblings, 0 replies; 17+ messages in thread
From: Chris Wilson @ 2020-12-18  9:19 UTC (permalink / raw)
  To: intel-gfx; +Cc: Chris Wilson, Tvrtko Ursulin, stable

Since we allow removing the timeline map at runtime, there is a risk
that rq->hwsp points into a stale page. To control that risk, we hold
the RCU read lock while reading *rq->hwsp, but we missed a couple of
important barriers. First, the unpinning / removal of the timeline map
must be after all RCU readers into that map are complete, i.e. after an
rcu barrier (in this case courtesy of call_rcu()). Secondly, we must
make sure that the rq->hwsp we are about to dereference under the RCU
lock is valid. In this case, we make the rq->hwsp pointer safe during
i915_request_retire() and so we know that rq->hwsp may become invalid
only after the request has been signaled. Therefore is the request is
not yet signaled when we acquire rq->hwsp under the RCU, we know that
rq->hwsp will remain valid for the duration of the RCU read lock.

This is a very small window that may lead to either considering the
request not completed (causing a delay until the request is checked
again, any wait for the request is not affected) or dereferencing an
invalid pointer.

Fixes: 3adac4689f58 ("drm/i915: Introduce concept of per-timeline (context) HWSP")
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: <stable@vger.kernel.org> # v5.1+
---
 drivers/gpu/drm/i915/gt/intel_breadcrumbs.c | 11 ++----
 drivers/gpu/drm/i915/gt/intel_timeline.c    |  6 ++--
 drivers/gpu/drm/i915/i915_request.h         | 37 ++++++++++++++++++---
 3 files changed, 39 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
index 3c62fd6daa76..f96cd7d9b419 100644
--- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
+++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
@@ -134,11 +134,6 @@ static bool remove_signaling_context(struct intel_breadcrumbs *b,
 	return true;
 }
 
-static inline bool __request_completed(const struct i915_request *rq)
-{
-	return i915_seqno_passed(__hwsp_seqno(rq), rq->fence.seqno);
-}
-
 __maybe_unused static bool
 check_signal_order(struct intel_context *ce, struct i915_request *rq)
 {
@@ -245,7 +240,7 @@ static void signal_irq_work(struct irq_work *work)
 		list_for_each_entry_rcu(rq, &ce->signals, signal_link) {
 			bool release;
 
-			if (!__request_completed(rq))
+			if (!__i915_request_is_complete(rq))
 				break;
 
 			if (!test_and_clear_bit(I915_FENCE_FLAG_SIGNAL,
@@ -380,7 +375,7 @@ static void insert_breadcrumb(struct i915_request *rq)
 	 * straight onto a signaled list, and queue the irq worker for
 	 * its signal completion.
 	 */
-	if (__request_completed(rq)) {
+	if (__i915_request_is_complete(rq)) {
 		irq_signal_request(rq, b);
 		return;
 	}
@@ -468,7 +463,7 @@ void i915_request_cancel_breadcrumb(struct i915_request *rq)
 	if (release)
 		intel_context_put(ce);
 
-	if (__request_completed(rq))
+	if (__i915_request_is_complete(rq))
 		irq_signal_request(rq, b);
 
 	i915_request_put(rq);
diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.c b/drivers/gpu/drm/i915/gt/intel_timeline.c
index 512afacd2bdc..a0ce2fb8737a 100644
--- a/drivers/gpu/drm/i915/gt/intel_timeline.c
+++ b/drivers/gpu/drm/i915/gt/intel_timeline.c
@@ -126,6 +126,10 @@ static void __rcu_cacheline_free(struct rcu_head *rcu)
 	struct intel_timeline_cacheline *cl =
 		container_of(rcu, typeof(*cl), rcu);
 
+	/* Must wait until after all *rq->hwsp are complete before removing */
+	i915_gem_object_unpin_map(cl->hwsp->vma->obj);
+	i915_vma_put(cl->hwsp->vma);
+
 	i915_active_fini(&cl->active);
 	kfree(cl);
 }
@@ -134,8 +138,6 @@ static void __idle_cacheline_free(struct intel_timeline_cacheline *cl)
 {
 	GEM_BUG_ON(!i915_active_is_idle(&cl->active));
 
-	i915_gem_object_unpin_map(cl->hwsp->vma->obj);
-	i915_vma_put(cl->hwsp->vma);
 	__idle_hwsp_free(cl->hwsp, ptr_unmask_bits(cl->vaddr, CACHELINE_BITS));
 
 	call_rcu(&cl->rcu, __rcu_cacheline_free);
diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
index 92e4320c50c4..7c4453e60323 100644
--- a/drivers/gpu/drm/i915/i915_request.h
+++ b/drivers/gpu/drm/i915/i915_request.h
@@ -440,7 +440,7 @@ static inline u32 hwsp_seqno(const struct i915_request *rq)
 
 static inline bool __i915_request_has_started(const struct i915_request *rq)
 {
-	return i915_seqno_passed(hwsp_seqno(rq), rq->fence.seqno - 1);
+	return i915_seqno_passed(__hwsp_seqno(rq), rq->fence.seqno - 1);
 }
 
 /**
@@ -471,11 +471,19 @@ static inline bool __i915_request_has_started(const struct i915_request *rq)
  */
 static inline bool i915_request_started(const struct i915_request *rq)
 {
+	bool result;
+
 	if (i915_request_signaled(rq))
 		return true;
 
-	/* Remember: started but may have since been preempted! */
-	return __i915_request_has_started(rq);
+	result = true;
+	rcu_read_lock(); /* the HWSP may be freed at runtime */
+	if (likely(!i915_request_signaled(rq)))
+		/* Remember: started but may have since been preempted! */
+		result = __i915_request_has_started(rq);
+	rcu_read_unlock();
+
+	return result;
 }
 
 /**
@@ -488,10 +496,16 @@ static inline bool i915_request_started(const struct i915_request *rq)
  */
 static inline bool i915_request_is_running(const struct i915_request *rq)
 {
+	bool result;
+
 	if (!i915_request_is_active(rq))
 		return false;
 
-	return __i915_request_has_started(rq);
+	rcu_read_lock();
+	result = __i915_request_has_started(rq) && i915_request_is_active(rq);
+	rcu_read_unlock();
+
+	return result;
 }
 
 /**
@@ -515,12 +529,25 @@ static inline bool i915_request_is_ready(const struct i915_request *rq)
 	return !list_empty(&rq->sched.link);
 }
 
+static inline bool __i915_request_is_complete(const struct i915_request *rq)
+{
+	return i915_seqno_passed(__hwsp_seqno(rq), rq->fence.seqno);
+}
+
 static inline bool i915_request_completed(const struct i915_request *rq)
 {
+	bool result;
+
 	if (i915_request_signaled(rq))
 		return true;
 
-	return i915_seqno_passed(hwsp_seqno(rq), rq->fence.seqno);
+	result = true;
+	rcu_read_lock(); /* the HWSP may be freed at runtime */
+	if (likely(!i915_request_signaled(rq)))
+		result = __i915_request_is_complete(rq);
+	rcu_read_unlock();
+
+	return result;
 }
 
 static inline void i915_request_mark_complete(struct i915_request *rq)
-- 
2.20.1


^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [Intel-gfx] [PATCH] drm/i915: Check for rq->hwsp validity after acquiring RCU lock
@ 2020-12-18  9:19 ` Chris Wilson
  0 siblings, 0 replies; 17+ messages in thread
From: Chris Wilson @ 2020-12-18  9:19 UTC (permalink / raw)
  To: intel-gfx; +Cc: stable, Chris Wilson

Since we allow removing the timeline map at runtime, there is a risk
that rq->hwsp points into a stale page. To control that risk, we hold
the RCU read lock while reading *rq->hwsp, but we missed a couple of
important barriers. First, the unpinning / removal of the timeline map
must be after all RCU readers into that map are complete, i.e. after an
rcu barrier (in this case courtesy of call_rcu()). Secondly, we must
make sure that the rq->hwsp we are about to dereference under the RCU
lock is valid. In this case, we make the rq->hwsp pointer safe during
i915_request_retire() and so we know that rq->hwsp may become invalid
only after the request has been signaled. Therefore is the request is
not yet signaled when we acquire rq->hwsp under the RCU, we know that
rq->hwsp will remain valid for the duration of the RCU read lock.

This is a very small window that may lead to either considering the
request not completed (causing a delay until the request is checked
again, any wait for the request is not affected) or dereferencing an
invalid pointer.

Fixes: 3adac4689f58 ("drm/i915: Introduce concept of per-timeline (context) HWSP")
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: <stable@vger.kernel.org> # v5.1+
---
 drivers/gpu/drm/i915/gt/intel_breadcrumbs.c | 11 ++----
 drivers/gpu/drm/i915/gt/intel_timeline.c    |  6 ++--
 drivers/gpu/drm/i915/i915_request.h         | 37 ++++++++++++++++++---
 3 files changed, 39 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
index 3c62fd6daa76..f96cd7d9b419 100644
--- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
+++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
@@ -134,11 +134,6 @@ static bool remove_signaling_context(struct intel_breadcrumbs *b,
 	return true;
 }
 
-static inline bool __request_completed(const struct i915_request *rq)
-{
-	return i915_seqno_passed(__hwsp_seqno(rq), rq->fence.seqno);
-}
-
 __maybe_unused static bool
 check_signal_order(struct intel_context *ce, struct i915_request *rq)
 {
@@ -245,7 +240,7 @@ static void signal_irq_work(struct irq_work *work)
 		list_for_each_entry_rcu(rq, &ce->signals, signal_link) {
 			bool release;
 
-			if (!__request_completed(rq))
+			if (!__i915_request_is_complete(rq))
 				break;
 
 			if (!test_and_clear_bit(I915_FENCE_FLAG_SIGNAL,
@@ -380,7 +375,7 @@ static void insert_breadcrumb(struct i915_request *rq)
 	 * straight onto a signaled list, and queue the irq worker for
 	 * its signal completion.
 	 */
-	if (__request_completed(rq)) {
+	if (__i915_request_is_complete(rq)) {
 		irq_signal_request(rq, b);
 		return;
 	}
@@ -468,7 +463,7 @@ void i915_request_cancel_breadcrumb(struct i915_request *rq)
 	if (release)
 		intel_context_put(ce);
 
-	if (__request_completed(rq))
+	if (__i915_request_is_complete(rq))
 		irq_signal_request(rq, b);
 
 	i915_request_put(rq);
diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.c b/drivers/gpu/drm/i915/gt/intel_timeline.c
index 512afacd2bdc..a0ce2fb8737a 100644
--- a/drivers/gpu/drm/i915/gt/intel_timeline.c
+++ b/drivers/gpu/drm/i915/gt/intel_timeline.c
@@ -126,6 +126,10 @@ static void __rcu_cacheline_free(struct rcu_head *rcu)
 	struct intel_timeline_cacheline *cl =
 		container_of(rcu, typeof(*cl), rcu);
 
+	/* Must wait until after all *rq->hwsp are complete before removing */
+	i915_gem_object_unpin_map(cl->hwsp->vma->obj);
+	i915_vma_put(cl->hwsp->vma);
+
 	i915_active_fini(&cl->active);
 	kfree(cl);
 }
@@ -134,8 +138,6 @@ static void __idle_cacheline_free(struct intel_timeline_cacheline *cl)
 {
 	GEM_BUG_ON(!i915_active_is_idle(&cl->active));
 
-	i915_gem_object_unpin_map(cl->hwsp->vma->obj);
-	i915_vma_put(cl->hwsp->vma);
 	__idle_hwsp_free(cl->hwsp, ptr_unmask_bits(cl->vaddr, CACHELINE_BITS));
 
 	call_rcu(&cl->rcu, __rcu_cacheline_free);
diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
index 92e4320c50c4..7c4453e60323 100644
--- a/drivers/gpu/drm/i915/i915_request.h
+++ b/drivers/gpu/drm/i915/i915_request.h
@@ -440,7 +440,7 @@ static inline u32 hwsp_seqno(const struct i915_request *rq)
 
 static inline bool __i915_request_has_started(const struct i915_request *rq)
 {
-	return i915_seqno_passed(hwsp_seqno(rq), rq->fence.seqno - 1);
+	return i915_seqno_passed(__hwsp_seqno(rq), rq->fence.seqno - 1);
 }
 
 /**
@@ -471,11 +471,19 @@ static inline bool __i915_request_has_started(const struct i915_request *rq)
  */
 static inline bool i915_request_started(const struct i915_request *rq)
 {
+	bool result;
+
 	if (i915_request_signaled(rq))
 		return true;
 
-	/* Remember: started but may have since been preempted! */
-	return __i915_request_has_started(rq);
+	result = true;
+	rcu_read_lock(); /* the HWSP may be freed at runtime */
+	if (likely(!i915_request_signaled(rq)))
+		/* Remember: started but may have since been preempted! */
+		result = __i915_request_has_started(rq);
+	rcu_read_unlock();
+
+	return result;
 }
 
 /**
@@ -488,10 +496,16 @@ static inline bool i915_request_started(const struct i915_request *rq)
  */
 static inline bool i915_request_is_running(const struct i915_request *rq)
 {
+	bool result;
+
 	if (!i915_request_is_active(rq))
 		return false;
 
-	return __i915_request_has_started(rq);
+	rcu_read_lock();
+	result = __i915_request_has_started(rq) && i915_request_is_active(rq);
+	rcu_read_unlock();
+
+	return result;
 }
 
 /**
@@ -515,12 +529,25 @@ static inline bool i915_request_is_ready(const struct i915_request *rq)
 	return !list_empty(&rq->sched.link);
 }
 
+static inline bool __i915_request_is_complete(const struct i915_request *rq)
+{
+	return i915_seqno_passed(__hwsp_seqno(rq), rq->fence.seqno);
+}
+
 static inline bool i915_request_completed(const struct i915_request *rq)
 {
+	bool result;
+
 	if (i915_request_signaled(rq))
 		return true;
 
-	return i915_seqno_passed(hwsp_seqno(rq), rq->fence.seqno);
+	result = true;
+	rcu_read_lock(); /* the HWSP may be freed at runtime */
+	if (likely(!i915_request_signaled(rq)))
+		result = __i915_request_is_complete(rq);
+	rcu_read_unlock();
+
+	return result;
 }
 
 static inline void i915_request_mark_complete(struct i915_request *rq)
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 17+ messages in thread

* Re: [PATCH] drm/i915: Check for rq->hwsp validity after acquiring RCU lock
  2020-12-18  9:19 ` [Intel-gfx] " Chris Wilson
@ 2020-12-18 12:10   ` Chris Wilson
  -1 siblings, 0 replies; 17+ messages in thread
From: Chris Wilson @ 2020-12-18 12:10 UTC (permalink / raw)
  To: intel-gfx; +Cc: Tvrtko Ursulin, stable

Quoting Chris Wilson (2020-12-18 09:19:44)
> Since we allow removing the timeline map at runtime, there is a risk
> that rq->hwsp points into a stale page. To control that risk, we hold
> the RCU read lock while reading *rq->hwsp, but we missed a couple of
> important barriers. First, the unpinning / removal of the timeline map
> must be after all RCU readers into that map are complete, i.e. after an
> rcu barrier (in this case courtesy of call_rcu()). Secondly, we must
> make sure that the rq->hwsp we are about to dereference under the RCU
> lock is valid. In this case, we make the rq->hwsp pointer safe during
> i915_request_retire() and so we know that rq->hwsp may become invalid
> only after the request has been signaled. Therefore is the request is
> not yet signaled when we acquire rq->hwsp under the RCU, we know that
> rq->hwsp will remain valid for the duration of the RCU read lock.
> 
> This is a very small window that may lead to either considering the
> request not completed (causing a delay until the request is checked
> again, any wait for the request is not affected) or dereferencing an
> invalid pointer.
> 
> Fixes: 3adac4689f58 ("drm/i915: Introduce concept of per-timeline (context) HWSP")
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Cc: <stable@vger.kernel.org> # v5.1+
> ---
>  drivers/gpu/drm/i915/gt/intel_breadcrumbs.c | 11 ++----
>  drivers/gpu/drm/i915/gt/intel_timeline.c    |  6 ++--
>  drivers/gpu/drm/i915/i915_request.h         | 37 ++++++++++++++++++---
>  3 files changed, 39 insertions(+), 15 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
> index 3c62fd6daa76..f96cd7d9b419 100644
> --- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
> +++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
> @@ -134,11 +134,6 @@ static bool remove_signaling_context(struct intel_breadcrumbs *b,
>         return true;
>  }
>  
> -static inline bool __request_completed(const struct i915_request *rq)
> -{
> -       return i915_seqno_passed(__hwsp_seqno(rq), rq->fence.seqno);
> -}
> -
>  __maybe_unused static bool
>  check_signal_order(struct intel_context *ce, struct i915_request *rq)
>  {
> @@ -245,7 +240,7 @@ static void signal_irq_work(struct irq_work *work)
>                 list_for_each_entry_rcu(rq, &ce->signals, signal_link) {
>                         bool release;
>  
> -                       if (!__request_completed(rq))
> +                       if (!__i915_request_is_complete(rq))
>                                 break;
>  
>                         if (!test_and_clear_bit(I915_FENCE_FLAG_SIGNAL,
> @@ -380,7 +375,7 @@ static void insert_breadcrumb(struct i915_request *rq)
>          * straight onto a signaled list, and queue the irq worker for
>          * its signal completion.
>          */
> -       if (__request_completed(rq)) {
> +       if (__i915_request_is_complete(rq)) {
>                 irq_signal_request(rq, b);
>                 return;
>         }
> @@ -468,7 +463,7 @@ void i915_request_cancel_breadcrumb(struct i915_request *rq)
>         if (release)
>                 intel_context_put(ce);
>  
> -       if (__request_completed(rq))
> +       if (__i915_request_is_complete(rq))
>                 irq_signal_request(rq, b);
>  
>         i915_request_put(rq);
> diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.c b/drivers/gpu/drm/i915/gt/intel_timeline.c
> index 512afacd2bdc..a0ce2fb8737a 100644
> --- a/drivers/gpu/drm/i915/gt/intel_timeline.c
> +++ b/drivers/gpu/drm/i915/gt/intel_timeline.c
> @@ -126,6 +126,10 @@ static void __rcu_cacheline_free(struct rcu_head *rcu)
>         struct intel_timeline_cacheline *cl =
>                 container_of(rcu, typeof(*cl), rcu);
>  
> +       /* Must wait until after all *rq->hwsp are complete before removing */
> +       i915_gem_object_unpin_map(cl->hwsp->vma->obj);
> +       i915_vma_put(cl->hwsp->vma);
> +
>         i915_active_fini(&cl->active);
>         kfree(cl);
>  }
> @@ -134,8 +138,6 @@ static void __idle_cacheline_free(struct intel_timeline_cacheline *cl)
>  {
>         GEM_BUG_ON(!i915_active_is_idle(&cl->active));
>  
> -       i915_gem_object_unpin_map(cl->hwsp->vma->obj);
> -       i915_vma_put(cl->hwsp->vma);
>         __idle_hwsp_free(cl->hwsp, ptr_unmask_bits(cl->vaddr, CACHELINE_BITS));

I was thinking this was just marking it as being available, but no it
really does free.
-Chris

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [Intel-gfx] [PATCH] drm/i915: Check for rq->hwsp validity after acquiring RCU lock
@ 2020-12-18 12:10   ` Chris Wilson
  0 siblings, 0 replies; 17+ messages in thread
From: Chris Wilson @ 2020-12-18 12:10 UTC (permalink / raw)
  To: intel-gfx; +Cc: stable

Quoting Chris Wilson (2020-12-18 09:19:44)
> Since we allow removing the timeline map at runtime, there is a risk
> that rq->hwsp points into a stale page. To control that risk, we hold
> the RCU read lock while reading *rq->hwsp, but we missed a couple of
> important barriers. First, the unpinning / removal of the timeline map
> must be after all RCU readers into that map are complete, i.e. after an
> rcu barrier (in this case courtesy of call_rcu()). Secondly, we must
> make sure that the rq->hwsp we are about to dereference under the RCU
> lock is valid. In this case, we make the rq->hwsp pointer safe during
> i915_request_retire() and so we know that rq->hwsp may become invalid
> only after the request has been signaled. Therefore is the request is
> not yet signaled when we acquire rq->hwsp under the RCU, we know that
> rq->hwsp will remain valid for the duration of the RCU read lock.
> 
> This is a very small window that may lead to either considering the
> request not completed (causing a delay until the request is checked
> again, any wait for the request is not affected) or dereferencing an
> invalid pointer.
> 
> Fixes: 3adac4689f58 ("drm/i915: Introduce concept of per-timeline (context) HWSP")
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Cc: <stable@vger.kernel.org> # v5.1+
> ---
>  drivers/gpu/drm/i915/gt/intel_breadcrumbs.c | 11 ++----
>  drivers/gpu/drm/i915/gt/intel_timeline.c    |  6 ++--
>  drivers/gpu/drm/i915/i915_request.h         | 37 ++++++++++++++++++---
>  3 files changed, 39 insertions(+), 15 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
> index 3c62fd6daa76..f96cd7d9b419 100644
> --- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
> +++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
> @@ -134,11 +134,6 @@ static bool remove_signaling_context(struct intel_breadcrumbs *b,
>         return true;
>  }
>  
> -static inline bool __request_completed(const struct i915_request *rq)
> -{
> -       return i915_seqno_passed(__hwsp_seqno(rq), rq->fence.seqno);
> -}
> -
>  __maybe_unused static bool
>  check_signal_order(struct intel_context *ce, struct i915_request *rq)
>  {
> @@ -245,7 +240,7 @@ static void signal_irq_work(struct irq_work *work)
>                 list_for_each_entry_rcu(rq, &ce->signals, signal_link) {
>                         bool release;
>  
> -                       if (!__request_completed(rq))
> +                       if (!__i915_request_is_complete(rq))
>                                 break;
>  
>                         if (!test_and_clear_bit(I915_FENCE_FLAG_SIGNAL,
> @@ -380,7 +375,7 @@ static void insert_breadcrumb(struct i915_request *rq)
>          * straight onto a signaled list, and queue the irq worker for
>          * its signal completion.
>          */
> -       if (__request_completed(rq)) {
> +       if (__i915_request_is_complete(rq)) {
>                 irq_signal_request(rq, b);
>                 return;
>         }
> @@ -468,7 +463,7 @@ void i915_request_cancel_breadcrumb(struct i915_request *rq)
>         if (release)
>                 intel_context_put(ce);
>  
> -       if (__request_completed(rq))
> +       if (__i915_request_is_complete(rq))
>                 irq_signal_request(rq, b);
>  
>         i915_request_put(rq);
> diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.c b/drivers/gpu/drm/i915/gt/intel_timeline.c
> index 512afacd2bdc..a0ce2fb8737a 100644
> --- a/drivers/gpu/drm/i915/gt/intel_timeline.c
> +++ b/drivers/gpu/drm/i915/gt/intel_timeline.c
> @@ -126,6 +126,10 @@ static void __rcu_cacheline_free(struct rcu_head *rcu)
>         struct intel_timeline_cacheline *cl =
>                 container_of(rcu, typeof(*cl), rcu);
>  
> +       /* Must wait until after all *rq->hwsp are complete before removing */
> +       i915_gem_object_unpin_map(cl->hwsp->vma->obj);
> +       i915_vma_put(cl->hwsp->vma);
> +
>         i915_active_fini(&cl->active);
>         kfree(cl);
>  }
> @@ -134,8 +138,6 @@ static void __idle_cacheline_free(struct intel_timeline_cacheline *cl)
>  {
>         GEM_BUG_ON(!i915_active_is_idle(&cl->active));
>  
> -       i915_gem_object_unpin_map(cl->hwsp->vma->obj);
> -       i915_vma_put(cl->hwsp->vma);
>         __idle_hwsp_free(cl->hwsp, ptr_unmask_bits(cl->vaddr, CACHELINE_BITS));

I was thinking this was just marking it as being available, but no it
really does free.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 17+ messages in thread

* [PATCH v2] drm/i915: Check for rq->hwsp validity after acquiring RCU lock
  2020-12-18  9:19 ` [Intel-gfx] " Chris Wilson
@ 2020-12-18 12:24   ` Chris Wilson
  -1 siblings, 0 replies; 17+ messages in thread
From: Chris Wilson @ 2020-12-18 12:24 UTC (permalink / raw)
  To: intel-gfx; +Cc: Chris Wilson, Tvrtko Ursulin, stable

Since we allow removing the timeline map at runtime, there is a risk
that rq->hwsp points into a stale page. To control that risk, we hold
the RCU read lock while reading *rq->hwsp, but we missed a couple of
important barriers. First, the unpinning / removal of the timeline map
must be after all RCU readers into that map are complete, i.e. after an
rcu barrier (in this case courtesy of call_rcu()). Secondly, we must
make sure that the rq->hwsp we are about to dereference under the RCU
lock is valid. In this case, we make the rq->hwsp pointer safe during
i915_request_retire() and so we know that rq->hwsp may become invalid
only after the request has been signaled. Therefore is the request is
not yet signaled when we acquire rq->hwsp under the RCU, we know that
rq->hwsp will remain valid for the duration of the RCU read lock.

This is a very small window that may lead to either considering the
request not completed (causing a delay until the request is checked
again, any wait for the request is not affected) or dereferencing an
invalid pointer.

Fixes: 3adac4689f58 ("drm/i915: Introduce concept of per-timeline (context) HWSP")
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: <stable@vger.kernel.org> # v5.1+
---
 drivers/gpu/drm/i915/gt/intel_breadcrumbs.c | 11 ++----
 drivers/gpu/drm/i915/gt/intel_timeline.c    | 10 +++---
 drivers/gpu/drm/i915/i915_request.h         | 37 ++++++++++++++++++---
 3 files changed, 39 insertions(+), 19 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
index 3c62fd6daa76..f96cd7d9b419 100644
--- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
+++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
@@ -134,11 +134,6 @@ static bool remove_signaling_context(struct intel_breadcrumbs *b,
 	return true;
 }
 
-static inline bool __request_completed(const struct i915_request *rq)
-{
-	return i915_seqno_passed(__hwsp_seqno(rq), rq->fence.seqno);
-}
-
 __maybe_unused static bool
 check_signal_order(struct intel_context *ce, struct i915_request *rq)
 {
@@ -245,7 +240,7 @@ static void signal_irq_work(struct irq_work *work)
 		list_for_each_entry_rcu(rq, &ce->signals, signal_link) {
 			bool release;
 
-			if (!__request_completed(rq))
+			if (!__i915_request_is_complete(rq))
 				break;
 
 			if (!test_and_clear_bit(I915_FENCE_FLAG_SIGNAL,
@@ -380,7 +375,7 @@ static void insert_breadcrumb(struct i915_request *rq)
 	 * straight onto a signaled list, and queue the irq worker for
 	 * its signal completion.
 	 */
-	if (__request_completed(rq)) {
+	if (__i915_request_is_complete(rq)) {
 		irq_signal_request(rq, b);
 		return;
 	}
@@ -468,7 +463,7 @@ void i915_request_cancel_breadcrumb(struct i915_request *rq)
 	if (release)
 		intel_context_put(ce);
 
-	if (__request_completed(rq))
+	if (__i915_request_is_complete(rq))
 		irq_signal_request(rq, b);
 
 	i915_request_put(rq);
diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.c b/drivers/gpu/drm/i915/gt/intel_timeline.c
index 512afacd2bdc..a005d0165bf4 100644
--- a/drivers/gpu/drm/i915/gt/intel_timeline.c
+++ b/drivers/gpu/drm/i915/gt/intel_timeline.c
@@ -126,6 +126,10 @@ static void __rcu_cacheline_free(struct rcu_head *rcu)
 	struct intel_timeline_cacheline *cl =
 		container_of(rcu, typeof(*cl), rcu);
 
+	/* Must wait until after all *rq->hwsp are complete before removing */
+	i915_gem_object_unpin_map(cl->hwsp->vma->obj);
+	__idle_hwsp_free(cl->hwsp, ptr_unmask_bits(cl->vaddr, CACHELINE_BITS));
+
 	i915_active_fini(&cl->active);
 	kfree(cl);
 }
@@ -133,11 +137,6 @@ static void __rcu_cacheline_free(struct rcu_head *rcu)
 static void __idle_cacheline_free(struct intel_timeline_cacheline *cl)
 {
 	GEM_BUG_ON(!i915_active_is_idle(&cl->active));
-
-	i915_gem_object_unpin_map(cl->hwsp->vma->obj);
-	i915_vma_put(cl->hwsp->vma);
-	__idle_hwsp_free(cl->hwsp, ptr_unmask_bits(cl->vaddr, CACHELINE_BITS));
-
 	call_rcu(&cl->rcu, __rcu_cacheline_free);
 }
 
@@ -179,7 +178,6 @@ cacheline_alloc(struct intel_timeline_hwsp *hwsp, unsigned int cacheline)
 		return ERR_CAST(vaddr);
 	}
 
-	i915_vma_get(hwsp->vma);
 	cl->hwsp = hwsp;
 	cl->vaddr = page_pack_bits(vaddr, cacheline);
 
diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
index 92e4320c50c4..7c4453e60323 100644
--- a/drivers/gpu/drm/i915/i915_request.h
+++ b/drivers/gpu/drm/i915/i915_request.h
@@ -440,7 +440,7 @@ static inline u32 hwsp_seqno(const struct i915_request *rq)
 
 static inline bool __i915_request_has_started(const struct i915_request *rq)
 {
-	return i915_seqno_passed(hwsp_seqno(rq), rq->fence.seqno - 1);
+	return i915_seqno_passed(__hwsp_seqno(rq), rq->fence.seqno - 1);
 }
 
 /**
@@ -471,11 +471,19 @@ static inline bool __i915_request_has_started(const struct i915_request *rq)
  */
 static inline bool i915_request_started(const struct i915_request *rq)
 {
+	bool result;
+
 	if (i915_request_signaled(rq))
 		return true;
 
-	/* Remember: started but may have since been preempted! */
-	return __i915_request_has_started(rq);
+	result = true;
+	rcu_read_lock(); /* the HWSP may be freed at runtime */
+	if (likely(!i915_request_signaled(rq)))
+		/* Remember: started but may have since been preempted! */
+		result = __i915_request_has_started(rq);
+	rcu_read_unlock();
+
+	return result;
 }
 
 /**
@@ -488,10 +496,16 @@ static inline bool i915_request_started(const struct i915_request *rq)
  */
 static inline bool i915_request_is_running(const struct i915_request *rq)
 {
+	bool result;
+
 	if (!i915_request_is_active(rq))
 		return false;
 
-	return __i915_request_has_started(rq);
+	rcu_read_lock();
+	result = __i915_request_has_started(rq) && i915_request_is_active(rq);
+	rcu_read_unlock();
+
+	return result;
 }
 
 /**
@@ -515,12 +529,25 @@ static inline bool i915_request_is_ready(const struct i915_request *rq)
 	return !list_empty(&rq->sched.link);
 }
 
+static inline bool __i915_request_is_complete(const struct i915_request *rq)
+{
+	return i915_seqno_passed(__hwsp_seqno(rq), rq->fence.seqno);
+}
+
 static inline bool i915_request_completed(const struct i915_request *rq)
 {
+	bool result;
+
 	if (i915_request_signaled(rq))
 		return true;
 
-	return i915_seqno_passed(hwsp_seqno(rq), rq->fence.seqno);
+	result = true;
+	rcu_read_lock(); /* the HWSP may be freed at runtime */
+	if (likely(!i915_request_signaled(rq)))
+		result = __i915_request_is_complete(rq);
+	rcu_read_unlock();
+
+	return result;
 }
 
 static inline void i915_request_mark_complete(struct i915_request *rq)
-- 
2.20.1


^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [Intel-gfx] [PATCH v2] drm/i915: Check for rq->hwsp validity after acquiring RCU lock
@ 2020-12-18 12:24   ` Chris Wilson
  0 siblings, 0 replies; 17+ messages in thread
From: Chris Wilson @ 2020-12-18 12:24 UTC (permalink / raw)
  To: intel-gfx; +Cc: stable, Chris Wilson

Since we allow removing the timeline map at runtime, there is a risk
that rq->hwsp points into a stale page. To control that risk, we hold
the RCU read lock while reading *rq->hwsp, but we missed a couple of
important barriers. First, the unpinning / removal of the timeline map
must be after all RCU readers into that map are complete, i.e. after an
rcu barrier (in this case courtesy of call_rcu()). Secondly, we must
make sure that the rq->hwsp we are about to dereference under the RCU
lock is valid. In this case, we make the rq->hwsp pointer safe during
i915_request_retire() and so we know that rq->hwsp may become invalid
only after the request has been signaled. Therefore is the request is
not yet signaled when we acquire rq->hwsp under the RCU, we know that
rq->hwsp will remain valid for the duration of the RCU read lock.

This is a very small window that may lead to either considering the
request not completed (causing a delay until the request is checked
again, any wait for the request is not affected) or dereferencing an
invalid pointer.

Fixes: 3adac4689f58 ("drm/i915: Introduce concept of per-timeline (context) HWSP")
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: <stable@vger.kernel.org> # v5.1+
---
 drivers/gpu/drm/i915/gt/intel_breadcrumbs.c | 11 ++----
 drivers/gpu/drm/i915/gt/intel_timeline.c    | 10 +++---
 drivers/gpu/drm/i915/i915_request.h         | 37 ++++++++++++++++++---
 3 files changed, 39 insertions(+), 19 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
index 3c62fd6daa76..f96cd7d9b419 100644
--- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
+++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
@@ -134,11 +134,6 @@ static bool remove_signaling_context(struct intel_breadcrumbs *b,
 	return true;
 }
 
-static inline bool __request_completed(const struct i915_request *rq)
-{
-	return i915_seqno_passed(__hwsp_seqno(rq), rq->fence.seqno);
-}
-
 __maybe_unused static bool
 check_signal_order(struct intel_context *ce, struct i915_request *rq)
 {
@@ -245,7 +240,7 @@ static void signal_irq_work(struct irq_work *work)
 		list_for_each_entry_rcu(rq, &ce->signals, signal_link) {
 			bool release;
 
-			if (!__request_completed(rq))
+			if (!__i915_request_is_complete(rq))
 				break;
 
 			if (!test_and_clear_bit(I915_FENCE_FLAG_SIGNAL,
@@ -380,7 +375,7 @@ static void insert_breadcrumb(struct i915_request *rq)
 	 * straight onto a signaled list, and queue the irq worker for
 	 * its signal completion.
 	 */
-	if (__request_completed(rq)) {
+	if (__i915_request_is_complete(rq)) {
 		irq_signal_request(rq, b);
 		return;
 	}
@@ -468,7 +463,7 @@ void i915_request_cancel_breadcrumb(struct i915_request *rq)
 	if (release)
 		intel_context_put(ce);
 
-	if (__request_completed(rq))
+	if (__i915_request_is_complete(rq))
 		irq_signal_request(rq, b);
 
 	i915_request_put(rq);
diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.c b/drivers/gpu/drm/i915/gt/intel_timeline.c
index 512afacd2bdc..a005d0165bf4 100644
--- a/drivers/gpu/drm/i915/gt/intel_timeline.c
+++ b/drivers/gpu/drm/i915/gt/intel_timeline.c
@@ -126,6 +126,10 @@ static void __rcu_cacheline_free(struct rcu_head *rcu)
 	struct intel_timeline_cacheline *cl =
 		container_of(rcu, typeof(*cl), rcu);
 
+	/* Must wait until after all *rq->hwsp are complete before removing */
+	i915_gem_object_unpin_map(cl->hwsp->vma->obj);
+	__idle_hwsp_free(cl->hwsp, ptr_unmask_bits(cl->vaddr, CACHELINE_BITS));
+
 	i915_active_fini(&cl->active);
 	kfree(cl);
 }
@@ -133,11 +137,6 @@ static void __rcu_cacheline_free(struct rcu_head *rcu)
 static void __idle_cacheline_free(struct intel_timeline_cacheline *cl)
 {
 	GEM_BUG_ON(!i915_active_is_idle(&cl->active));
-
-	i915_gem_object_unpin_map(cl->hwsp->vma->obj);
-	i915_vma_put(cl->hwsp->vma);
-	__idle_hwsp_free(cl->hwsp, ptr_unmask_bits(cl->vaddr, CACHELINE_BITS));
-
 	call_rcu(&cl->rcu, __rcu_cacheline_free);
 }
 
@@ -179,7 +178,6 @@ cacheline_alloc(struct intel_timeline_hwsp *hwsp, unsigned int cacheline)
 		return ERR_CAST(vaddr);
 	}
 
-	i915_vma_get(hwsp->vma);
 	cl->hwsp = hwsp;
 	cl->vaddr = page_pack_bits(vaddr, cacheline);
 
diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
index 92e4320c50c4..7c4453e60323 100644
--- a/drivers/gpu/drm/i915/i915_request.h
+++ b/drivers/gpu/drm/i915/i915_request.h
@@ -440,7 +440,7 @@ static inline u32 hwsp_seqno(const struct i915_request *rq)
 
 static inline bool __i915_request_has_started(const struct i915_request *rq)
 {
-	return i915_seqno_passed(hwsp_seqno(rq), rq->fence.seqno - 1);
+	return i915_seqno_passed(__hwsp_seqno(rq), rq->fence.seqno - 1);
 }
 
 /**
@@ -471,11 +471,19 @@ static inline bool __i915_request_has_started(const struct i915_request *rq)
  */
 static inline bool i915_request_started(const struct i915_request *rq)
 {
+	bool result;
+
 	if (i915_request_signaled(rq))
 		return true;
 
-	/* Remember: started but may have since been preempted! */
-	return __i915_request_has_started(rq);
+	result = true;
+	rcu_read_lock(); /* the HWSP may be freed at runtime */
+	if (likely(!i915_request_signaled(rq)))
+		/* Remember: started but may have since been preempted! */
+		result = __i915_request_has_started(rq);
+	rcu_read_unlock();
+
+	return result;
 }
 
 /**
@@ -488,10 +496,16 @@ static inline bool i915_request_started(const struct i915_request *rq)
  */
 static inline bool i915_request_is_running(const struct i915_request *rq)
 {
+	bool result;
+
 	if (!i915_request_is_active(rq))
 		return false;
 
-	return __i915_request_has_started(rq);
+	rcu_read_lock();
+	result = __i915_request_has_started(rq) && i915_request_is_active(rq);
+	rcu_read_unlock();
+
+	return result;
 }
 
 /**
@@ -515,12 +529,25 @@ static inline bool i915_request_is_ready(const struct i915_request *rq)
 	return !list_empty(&rq->sched.link);
 }
 
+static inline bool __i915_request_is_complete(const struct i915_request *rq)
+{
+	return i915_seqno_passed(__hwsp_seqno(rq), rq->fence.seqno);
+}
+
 static inline bool i915_request_completed(const struct i915_request *rq)
 {
+	bool result;
+
 	if (i915_request_signaled(rq))
 		return true;
 
-	return i915_seqno_passed(hwsp_seqno(rq), rq->fence.seqno);
+	result = true;
+	rcu_read_lock(); /* the HWSP may be freed at runtime */
+	if (likely(!i915_request_signaled(rq)))
+		result = __i915_request_is_complete(rq);
+	rcu_read_unlock();
+
+	return result;
 }
 
 static inline void i915_request_mark_complete(struct i915_request *rq)
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [Intel-gfx] ✗ Fi.CI.CHECKPATCH: warning for drm/i915: Check for rq->hwsp validity after acquiring RCU lock
  2020-12-18  9:19 ` [Intel-gfx] " Chris Wilson
                   ` (2 preceding siblings ...)
  (?)
@ 2020-12-18 12:29 ` Patchwork
  -1 siblings, 0 replies; 17+ messages in thread
From: Patchwork @ 2020-12-18 12:29 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: drm/i915: Check for rq->hwsp validity after acquiring RCU lock
URL   : https://patchwork.freedesktop.org/series/85071/
State : warning

== Summary ==

$ dim checkpatch origin/drm-tip
eda31dae2517 drm/i915: Check for rq->hwsp validity after acquiring RCU lock
-:28: WARNING:BAD_SIGN_OFF: email address '<stable@vger.kernel.org> # v5.1+' might be better as 'stable@vger.kernel.org# v5.1+'
#28: 
Cc: <stable@vger.kernel.org> # v5.1+

total: 0 errors, 1 warnings, 0 checks, 125 lines checked


_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 17+ messages in thread

* [Intel-gfx] ✗ Fi.CI.BAT: failure for drm/i915: Check for rq->hwsp validity after acquiring RCU lock
  2020-12-18  9:19 ` [Intel-gfx] " Chris Wilson
                   ` (3 preceding siblings ...)
  (?)
@ 2020-12-18 12:48 ` Patchwork
  -1 siblings, 0 replies; 17+ messages in thread
From: Patchwork @ 2020-12-18 12:48 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx


[-- Attachment #1.1: Type: text/plain, Size: 5368 bytes --]

== Series Details ==

Series: drm/i915: Check for rq->hwsp validity after acquiring RCU lock
URL   : https://patchwork.freedesktop.org/series/85071/
State : failure

== Summary ==

CI Bug Log - changes from CI_DRM_9503 -> Patchwork_19175
====================================================

Summary
-------

  **FAILURE**

  Serious unknown changes coming with Patchwork_19175 absolutely need to be
  verified manually.
  
  If you think the reported changes have nothing to do with the changes
  introduced in Patchwork_19175, please notify your bug team to allow them
  to document this new failure mode, which will reduce false positives in CI.

  External URL: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19175/index.html

Possible new issues
-------------------

  Here are the unknown changes that may have been introduced in Patchwork_19175:

### IGT changes ###

#### Possible regressions ####

  * igt@i915_selftest@live@gt_timelines:
    - fi-ivb-3770:        [PASS][1] -> [INCOMPLETE][2]
   [1]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9503/fi-ivb-3770/igt@i915_selftest@live@gt_timelines.html
   [2]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19175/fi-ivb-3770/igt@i915_selftest@live@gt_timelines.html
    - fi-snb-2520m:       [PASS][3] -> [INCOMPLETE][4]
   [3]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9503/fi-snb-2520m/igt@i915_selftest@live@gt_timelines.html
   [4]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19175/fi-snb-2520m/igt@i915_selftest@live@gt_timelines.html
    - fi-byt-j1900:       [PASS][5] -> [INCOMPLETE][6]
   [5]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9503/fi-byt-j1900/igt@i915_selftest@live@gt_timelines.html
   [6]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19175/fi-byt-j1900/igt@i915_selftest@live@gt_timelines.html
    - fi-hsw-4770:        [PASS][7] -> [INCOMPLETE][8]
   [7]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9503/fi-hsw-4770/igt@i915_selftest@live@gt_timelines.html
   [8]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19175/fi-hsw-4770/igt@i915_selftest@live@gt_timelines.html
    - fi-bwr-2160:        [PASS][9] -> [INCOMPLETE][10]
   [9]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9503/fi-bwr-2160/igt@i915_selftest@live@gt_timelines.html
   [10]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19175/fi-bwr-2160/igt@i915_selftest@live@gt_timelines.html
    - fi-snb-2600:        [PASS][11] -> [INCOMPLETE][12]
   [11]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9503/fi-snb-2600/igt@i915_selftest@live@gt_timelines.html
   [12]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19175/fi-snb-2600/igt@i915_selftest@live@gt_timelines.html
    - fi-ilk-650:         [PASS][13] -> [INCOMPLETE][14]
   [13]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9503/fi-ilk-650/igt@i915_selftest@live@gt_timelines.html
   [14]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19175/fi-ilk-650/igt@i915_selftest@live@gt_timelines.html
    - fi-elk-e7500:       [PASS][15] -> [INCOMPLETE][16]
   [15]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9503/fi-elk-e7500/igt@i915_selftest@live@gt_timelines.html
   [16]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19175/fi-elk-e7500/igt@i915_selftest@live@gt_timelines.html

  
Known issues
------------

  Here are the changes found in Patchwork_19175 that come from known issues:

### IGT changes ###

#### Issues hit ####

  * igt@i915_selftest@live@gt_timelines:
    - fi-pnv-d510:        [PASS][17] -> [INCOMPLETE][18] ([i915#299])
   [17]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9503/fi-pnv-d510/igt@i915_selftest@live@gt_timelines.html
   [18]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19175/fi-pnv-d510/igt@i915_selftest@live@gt_timelines.html
    - fi-gdg-551:         [PASS][19] -> [INCOMPLETE][20] ([i915#172])
   [19]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9503/fi-gdg-551/igt@i915_selftest@live@gt_timelines.html
   [20]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19175/fi-gdg-551/igt@i915_selftest@live@gt_timelines.html

  
  [i915#172]: https://gitlab.freedesktop.org/drm/intel/issues/172
  [i915#299]: https://gitlab.freedesktop.org/drm/intel/issues/299


Participating hosts (42 -> 11)
------------------------------

  ERROR: It appears as if the changes made in Patchwork_19175 prevented too many machines from booting.

  Missing    (31): fi-kbl-soraka fi-bdw-gvtdvm fi-icl-u2 fi-apl-guc fi-icl-y fi-skl-6600u fi-cml-u2 fi-bxt-dsi fi-bdw-5557u fi-cml-s fi-bsw-n3050 fi-tgl-u2 fi-glk-dsi fi-kbl-7500u fi-tgl-y fi-bsw-nick fi-skl-6700k2 fi-kbl-r fi-ilk-m540 fi-ehl-1 fi-tgl-dsi fi-skl-guc fi-cfl-8700k fi-hsw-4200u fi-cfl-guc fi-kbl-guc fi-kbl-x1275 fi-cfl-8109u fi-kbl-8809g fi-bsw-kefka fi-bdw-samus 


Build changes
-------------

  * Linux: CI_DRM_9503 -> Patchwork_19175

  CI-20190529: 20190529
  CI_DRM_9503: 82c5c0ad8d578504865837b2135b60dd2d0054a1 @ git://anongit.freedesktop.org/gfx-ci/linux
  IGT_5909: 3d6caf71a3e988cd125eb9efdd0a7cdcd0451673 @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
  Patchwork_19175: eda31dae251767aa255796fbca58a83c9f523f0a @ git://anongit.freedesktop.org/gfx-ci/linux


== Linux commits ==

eda31dae2517 drm/i915: Check for rq->hwsp validity after acquiring RCU lock

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19175/index.html

[-- Attachment #1.2: Type: text/html, Size: 6160 bytes --]

[-- Attachment #2: Type: text/plain, Size: 160 bytes --]

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 17+ messages in thread

* [Intel-gfx] ✗ Fi.CI.CHECKPATCH: warning for drm/i915: Check for rq->hwsp validity after acquiring RCU lock (rev2)
  2020-12-18  9:19 ` [Intel-gfx] " Chris Wilson
                   ` (4 preceding siblings ...)
  (?)
@ 2020-12-18 13:26 ` Patchwork
  -1 siblings, 0 replies; 17+ messages in thread
From: Patchwork @ 2020-12-18 13:26 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: drm/i915: Check for rq->hwsp validity after acquiring RCU lock (rev2)
URL   : https://patchwork.freedesktop.org/series/85071/
State : warning

== Summary ==

$ dim checkpatch origin/drm-tip
a93ba5a18bf8 drm/i915: Check for rq->hwsp validity after acquiring RCU lock
-:28: WARNING:BAD_SIGN_OFF: email address '<stable@vger.kernel.org> # v5.1+' might be better as 'stable@vger.kernel.org# v5.1+'
#28: 
Cc: <stable@vger.kernel.org> # v5.1+

total: 0 errors, 1 warnings, 0 checks, 135 lines checked


_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 17+ messages in thread

* [Intel-gfx] ✓ Fi.CI.BAT: success for drm/i915: Check for rq->hwsp validity after acquiring RCU lock (rev2)
  2020-12-18  9:19 ` [Intel-gfx] " Chris Wilson
                   ` (5 preceding siblings ...)
  (?)
@ 2020-12-18 13:56 ` Patchwork
  -1 siblings, 0 replies; 17+ messages in thread
From: Patchwork @ 2020-12-18 13:56 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx


[-- Attachment #1.1: Type: text/plain, Size: 3581 bytes --]

== Series Details ==

Series: drm/i915: Check for rq->hwsp validity after acquiring RCU lock (rev2)
URL   : https://patchwork.freedesktop.org/series/85071/
State : success

== Summary ==

CI Bug Log - changes from CI_DRM_9503 -> Patchwork_19178
====================================================

Summary
-------

  **SUCCESS**

  No regressions found.

  External URL: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19178/index.html

Known issues
------------

  Here are the changes found in Patchwork_19178 that come from known issues:

### IGT changes ###

#### Issues hit ####

  * igt@debugfs_test@read_all_entries:
    - fi-tgl-y:           [PASS][1] -> [DMESG-WARN][2] ([i915#402]) +2 similar issues
   [1]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9503/fi-tgl-y/igt@debugfs_test@read_all_entries.html
   [2]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19178/fi-tgl-y/igt@debugfs_test@read_all_entries.html

  * igt@kms_chamelium@dp-crc-fast:
    - fi-cml-u2:          [PASS][3] -> [FAIL][4] ([i915#1161] / [i915#262])
   [3]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9503/fi-cml-u2/igt@kms_chamelium@dp-crc-fast.html
   [4]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19178/fi-cml-u2/igt@kms_chamelium@dp-crc-fast.html

  * igt@kms_pipe_crc_basic@compare-crc-sanitycheck-pipe-b:
    - fi-cfl-8109u:       [PASS][5] -> [DMESG-WARN][6] ([i915#165]) +15 similar issues
   [5]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9503/fi-cfl-8109u/igt@kms_pipe_crc_basic@compare-crc-sanitycheck-pipe-b.html
   [6]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19178/fi-cfl-8109u/igt@kms_pipe_crc_basic@compare-crc-sanitycheck-pipe-b.html

  
#### Possible fixes ####

  * igt@i915_selftest@live@active:
    - fi-kbl-r:           [DMESG-FAIL][7] ([i915#2291] / [i915#666]) -> [PASS][8]
   [7]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9503/fi-kbl-r/igt@i915_selftest@live@active.html
   [8]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19178/fi-kbl-r/igt@i915_selftest@live@active.html

  * igt@prime_self_import@basic-with_one_bo_two_files:
    - fi-tgl-y:           [DMESG-WARN][9] ([i915#402]) -> [PASS][10] +1 similar issue
   [9]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9503/fi-tgl-y/igt@prime_self_import@basic-with_one_bo_two_files.html
   [10]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19178/fi-tgl-y/igt@prime_self_import@basic-with_one_bo_two_files.html

  
  [i915#1161]: https://gitlab.freedesktop.org/drm/intel/issues/1161
  [i915#165]: https://gitlab.freedesktop.org/drm/intel/issues/165
  [i915#2291]: https://gitlab.freedesktop.org/drm/intel/issues/2291
  [i915#262]: https://gitlab.freedesktop.org/drm/intel/issues/262
  [i915#402]: https://gitlab.freedesktop.org/drm/intel/issues/402
  [i915#666]: https://gitlab.freedesktop.org/drm/intel/issues/666


Participating hosts (42 -> 39)
------------------------------

  Missing    (3): fi-ilk-m540 fi-bdw-samus fi-hsw-4200u 


Build changes
-------------

  * Linux: CI_DRM_9503 -> Patchwork_19178

  CI-20190529: 20190529
  CI_DRM_9503: 82c5c0ad8d578504865837b2135b60dd2d0054a1 @ git://anongit.freedesktop.org/gfx-ci/linux
  IGT_5909: 3d6caf71a3e988cd125eb9efdd0a7cdcd0451673 @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
  Patchwork_19178: a93ba5a18bf8acda650027601343a1bdc52b84ec @ git://anongit.freedesktop.org/gfx-ci/linux


== Linux commits ==

a93ba5a18bf8 drm/i915: Check for rq->hwsp validity after acquiring RCU lock

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19178/index.html

[-- Attachment #1.2: Type: text/html, Size: 4373 bytes --]

[-- Attachment #2: Type: text/plain, Size: 160 bytes --]

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [Intel-gfx] [PATCH v2] drm/i915: Check for rq->hwsp validity after acquiring RCU lock
  2020-12-18 12:24   ` [Intel-gfx] " Chris Wilson
@ 2020-12-18 15:52     ` Tvrtko Ursulin
  -1 siblings, 0 replies; 17+ messages in thread
From: Tvrtko Ursulin @ 2020-12-18 15:52 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: stable


On 18/12/2020 12:24, Chris Wilson wrote:
> Since we allow removing the timeline map at runtime, there is a risk
> that rq->hwsp points into a stale page. To control that risk, we hold
> the RCU read lock while reading *rq->hwsp, but we missed a couple of
> important barriers. First, the unpinning / removal of the timeline map
> must be after all RCU readers into that map are complete, i.e. after an
> rcu barrier (in this case courtesy of call_rcu()). Secondly, we must
> make sure that the rq->hwsp we are about to dereference under the RCU
> lock is valid. In this case, we make the rq->hwsp pointer safe during
> i915_request_retire() and so we know that rq->hwsp may become invalid
> only after the request has been signaled. Therefore is the request is
> not yet signaled when we acquire rq->hwsp under the RCU, we know that
> rq->hwsp will remain valid for the duration of the RCU read lock.
> 
> This is a very small window that may lead to either considering the
> request not completed (causing a delay until the request is checked
> again, any wait for the request is not affected) or dereferencing an
> invalid pointer.
> 
> Fixes: 3adac4689f58 ("drm/i915: Introduce concept of per-timeline (context) HWSP")
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Cc: <stable@vger.kernel.org> # v5.1+
> ---
>   drivers/gpu/drm/i915/gt/intel_breadcrumbs.c | 11 ++----
>   drivers/gpu/drm/i915/gt/intel_timeline.c    | 10 +++---
>   drivers/gpu/drm/i915/i915_request.h         | 37 ++++++++++++++++++---
>   3 files changed, 39 insertions(+), 19 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
> index 3c62fd6daa76..f96cd7d9b419 100644
> --- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
> +++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
> @@ -134,11 +134,6 @@ static bool remove_signaling_context(struct intel_breadcrumbs *b,
>   	return true;
>   }
>   
> -static inline bool __request_completed(const struct i915_request *rq)
> -{
> -	return i915_seqno_passed(__hwsp_seqno(rq), rq->fence.seqno);
> -}
> -
>   __maybe_unused static bool
>   check_signal_order(struct intel_context *ce, struct i915_request *rq)
>   {
> @@ -245,7 +240,7 @@ static void signal_irq_work(struct irq_work *work)
>   		list_for_each_entry_rcu(rq, &ce->signals, signal_link) {
>   			bool release;
>   
> -			if (!__request_completed(rq))
> +			if (!__i915_request_is_complete(rq))
>   				break;
>   
>   			if (!test_and_clear_bit(I915_FENCE_FLAG_SIGNAL,
> @@ -380,7 +375,7 @@ static void insert_breadcrumb(struct i915_request *rq)
>   	 * straight onto a signaled list, and queue the irq worker for
>   	 * its signal completion.
>   	 */
> -	if (__request_completed(rq)) {
> +	if (__i915_request_is_complete(rq)) {
>   		irq_signal_request(rq, b);
>   		return;
>   	}
> @@ -468,7 +463,7 @@ void i915_request_cancel_breadcrumb(struct i915_request *rq)
>   	if (release)
>   		intel_context_put(ce);
>   
> -	if (__request_completed(rq))
> +	if (__i915_request_is_complete(rq))
>   		irq_signal_request(rq, b);
>   
>   	i915_request_put(rq);
> diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.c b/drivers/gpu/drm/i915/gt/intel_timeline.c
> index 512afacd2bdc..a005d0165bf4 100644
> --- a/drivers/gpu/drm/i915/gt/intel_timeline.c
> +++ b/drivers/gpu/drm/i915/gt/intel_timeline.c
> @@ -126,6 +126,10 @@ static void __rcu_cacheline_free(struct rcu_head *rcu)
>   	struct intel_timeline_cacheline *cl =
>   		container_of(rcu, typeof(*cl), rcu);
>   
> +	/* Must wait until after all *rq->hwsp are complete before removing */
> +	i915_gem_object_unpin_map(cl->hwsp->vma->obj);
> +	__idle_hwsp_free(cl->hwsp, ptr_unmask_bits(cl->vaddr, CACHELINE_BITS));
> +
>   	i915_active_fini(&cl->active);
>   	kfree(cl);
>   }
> @@ -133,11 +137,6 @@ static void __rcu_cacheline_free(struct rcu_head *rcu)
>   static void __idle_cacheline_free(struct intel_timeline_cacheline *cl)
>   {
>   	GEM_BUG_ON(!i915_active_is_idle(&cl->active));
> -
> -	i915_gem_object_unpin_map(cl->hwsp->vma->obj);
> -	i915_vma_put(cl->hwsp->vma);
> -	__idle_hwsp_free(cl->hwsp, ptr_unmask_bits(cl->vaddr, CACHELINE_BITS));
> -
>   	call_rcu(&cl->rcu, __rcu_cacheline_free);
>   }
>   
> @@ -179,7 +178,6 @@ cacheline_alloc(struct intel_timeline_hwsp *hwsp, unsigned int cacheline)
>   		return ERR_CAST(vaddr);
>   	}
>   
> -	i915_vma_get(hwsp->vma);
>   	cl->hwsp = hwsp;
>   	cl->vaddr = page_pack_bits(vaddr, cacheline);
>   
> diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
> index 92e4320c50c4..7c4453e60323 100644
> --- a/drivers/gpu/drm/i915/i915_request.h
> +++ b/drivers/gpu/drm/i915/i915_request.h
> @@ -440,7 +440,7 @@ static inline u32 hwsp_seqno(const struct i915_request *rq)
>   
>   static inline bool __i915_request_has_started(const struct i915_request *rq)
>   {
> -	return i915_seqno_passed(hwsp_seqno(rq), rq->fence.seqno - 1);
> +	return i915_seqno_passed(__hwsp_seqno(rq), rq->fence.seqno - 1);
>   }
>   
>   /**
> @@ -471,11 +471,19 @@ static inline bool __i915_request_has_started(const struct i915_request *rq)
>    */
>   static inline bool i915_request_started(const struct i915_request *rq)
>   {
> +	bool result;
> +
>   	if (i915_request_signaled(rq))
>   		return true;
>   
> -	/* Remember: started but may have since been preempted! */
> -	return __i915_request_has_started(rq);
> +	result = true;
> +	rcu_read_lock(); /* the HWSP may be freed at runtime */
> +	if (likely(!i915_request_signaled(rq)))
> +		/* Remember: started but may have since been preempted! */
> +		result = __i915_request_has_started(rq);
> +	rcu_read_unlock();
> +
> +	return result;
>   }
>   
>   /**
> @@ -488,10 +496,16 @@ static inline bool i915_request_started(const struct i915_request *rq)
>    */
>   static inline bool i915_request_is_running(const struct i915_request *rq)
>   {
> +	bool result;
> +
>   	if (!i915_request_is_active(rq))
>   		return false;
>   
> -	return __i915_request_has_started(rq);
> +	rcu_read_lock();
> +	result = __i915_request_has_started(rq) && i915_request_is_active(rq);
> +	rcu_read_unlock();
> +
> +	return result;
>   }
>   
>   /**
> @@ -515,12 +529,25 @@ static inline bool i915_request_is_ready(const struct i915_request *rq)
>   	return !list_empty(&rq->sched.link);
>   }
>   
> +static inline bool __i915_request_is_complete(const struct i915_request *rq)
> +{
> +	return i915_seqno_passed(__hwsp_seqno(rq), rq->fence.seqno);
> +}
> +
>   static inline bool i915_request_completed(const struct i915_request *rq)
>   {
> +	bool result;
> +
>   	if (i915_request_signaled(rq))
>   		return true;
>   
> -	return i915_seqno_passed(hwsp_seqno(rq), rq->fence.seqno);
> +	result = true;
> +	rcu_read_lock(); /* the HWSP may be freed at runtime */
> +	if (likely(!i915_request_signaled(rq)))
> +		result = __i915_request_is_complete(rq);
> +	rcu_read_unlock();
> +
> +	return result; >   }
>   
>   static inline void i915_request_mark_complete(struct i915_request *rq)
> 

Should rq->hwsp_seqno be marked as rcu pointer?

We reset the fence signaled status before re-assigning the timeline. So 
if we were to query a request in the process of being allocated, do we 
need to do something extra to make sure !signaled status is not seen 
before the rq->hwsp_seqno is replaced? Like should the order of re-init 
be inverted?

Regards,

Tvrtko

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [Intel-gfx] [PATCH v2] drm/i915: Check for rq->hwsp validity after acquiring RCU lock
@ 2020-12-18 15:52     ` Tvrtko Ursulin
  0 siblings, 0 replies; 17+ messages in thread
From: Tvrtko Ursulin @ 2020-12-18 15:52 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: stable


On 18/12/2020 12:24, Chris Wilson wrote:
> Since we allow removing the timeline map at runtime, there is a risk
> that rq->hwsp points into a stale page. To control that risk, we hold
> the RCU read lock while reading *rq->hwsp, but we missed a couple of
> important barriers. First, the unpinning / removal of the timeline map
> must be after all RCU readers into that map are complete, i.e. after an
> rcu barrier (in this case courtesy of call_rcu()). Secondly, we must
> make sure that the rq->hwsp we are about to dereference under the RCU
> lock is valid. In this case, we make the rq->hwsp pointer safe during
> i915_request_retire() and so we know that rq->hwsp may become invalid
> only after the request has been signaled. Therefore is the request is
> not yet signaled when we acquire rq->hwsp under the RCU, we know that
> rq->hwsp will remain valid for the duration of the RCU read lock.
> 
> This is a very small window that may lead to either considering the
> request not completed (causing a delay until the request is checked
> again, any wait for the request is not affected) or dereferencing an
> invalid pointer.
> 
> Fixes: 3adac4689f58 ("drm/i915: Introduce concept of per-timeline (context) HWSP")
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Cc: <stable@vger.kernel.org> # v5.1+
> ---
>   drivers/gpu/drm/i915/gt/intel_breadcrumbs.c | 11 ++----
>   drivers/gpu/drm/i915/gt/intel_timeline.c    | 10 +++---
>   drivers/gpu/drm/i915/i915_request.h         | 37 ++++++++++++++++++---
>   3 files changed, 39 insertions(+), 19 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
> index 3c62fd6daa76..f96cd7d9b419 100644
> --- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
> +++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
> @@ -134,11 +134,6 @@ static bool remove_signaling_context(struct intel_breadcrumbs *b,
>   	return true;
>   }
>   
> -static inline bool __request_completed(const struct i915_request *rq)
> -{
> -	return i915_seqno_passed(__hwsp_seqno(rq), rq->fence.seqno);
> -}
> -
>   __maybe_unused static bool
>   check_signal_order(struct intel_context *ce, struct i915_request *rq)
>   {
> @@ -245,7 +240,7 @@ static void signal_irq_work(struct irq_work *work)
>   		list_for_each_entry_rcu(rq, &ce->signals, signal_link) {
>   			bool release;
>   
> -			if (!__request_completed(rq))
> +			if (!__i915_request_is_complete(rq))
>   				break;
>   
>   			if (!test_and_clear_bit(I915_FENCE_FLAG_SIGNAL,
> @@ -380,7 +375,7 @@ static void insert_breadcrumb(struct i915_request *rq)
>   	 * straight onto a signaled list, and queue the irq worker for
>   	 * its signal completion.
>   	 */
> -	if (__request_completed(rq)) {
> +	if (__i915_request_is_complete(rq)) {
>   		irq_signal_request(rq, b);
>   		return;
>   	}
> @@ -468,7 +463,7 @@ void i915_request_cancel_breadcrumb(struct i915_request *rq)
>   	if (release)
>   		intel_context_put(ce);
>   
> -	if (__request_completed(rq))
> +	if (__i915_request_is_complete(rq))
>   		irq_signal_request(rq, b);
>   
>   	i915_request_put(rq);
> diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.c b/drivers/gpu/drm/i915/gt/intel_timeline.c
> index 512afacd2bdc..a005d0165bf4 100644
> --- a/drivers/gpu/drm/i915/gt/intel_timeline.c
> +++ b/drivers/gpu/drm/i915/gt/intel_timeline.c
> @@ -126,6 +126,10 @@ static void __rcu_cacheline_free(struct rcu_head *rcu)
>   	struct intel_timeline_cacheline *cl =
>   		container_of(rcu, typeof(*cl), rcu);
>   
> +	/* Must wait until after all *rq->hwsp are complete before removing */
> +	i915_gem_object_unpin_map(cl->hwsp->vma->obj);
> +	__idle_hwsp_free(cl->hwsp, ptr_unmask_bits(cl->vaddr, CACHELINE_BITS));
> +
>   	i915_active_fini(&cl->active);
>   	kfree(cl);
>   }
> @@ -133,11 +137,6 @@ static void __rcu_cacheline_free(struct rcu_head *rcu)
>   static void __idle_cacheline_free(struct intel_timeline_cacheline *cl)
>   {
>   	GEM_BUG_ON(!i915_active_is_idle(&cl->active));
> -
> -	i915_gem_object_unpin_map(cl->hwsp->vma->obj);
> -	i915_vma_put(cl->hwsp->vma);
> -	__idle_hwsp_free(cl->hwsp, ptr_unmask_bits(cl->vaddr, CACHELINE_BITS));
> -
>   	call_rcu(&cl->rcu, __rcu_cacheline_free);
>   }
>   
> @@ -179,7 +178,6 @@ cacheline_alloc(struct intel_timeline_hwsp *hwsp, unsigned int cacheline)
>   		return ERR_CAST(vaddr);
>   	}
>   
> -	i915_vma_get(hwsp->vma);
>   	cl->hwsp = hwsp;
>   	cl->vaddr = page_pack_bits(vaddr, cacheline);
>   
> diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
> index 92e4320c50c4..7c4453e60323 100644
> --- a/drivers/gpu/drm/i915/i915_request.h
> +++ b/drivers/gpu/drm/i915/i915_request.h
> @@ -440,7 +440,7 @@ static inline u32 hwsp_seqno(const struct i915_request *rq)
>   
>   static inline bool __i915_request_has_started(const struct i915_request *rq)
>   {
> -	return i915_seqno_passed(hwsp_seqno(rq), rq->fence.seqno - 1);
> +	return i915_seqno_passed(__hwsp_seqno(rq), rq->fence.seqno - 1);
>   }
>   
>   /**
> @@ -471,11 +471,19 @@ static inline bool __i915_request_has_started(const struct i915_request *rq)
>    */
>   static inline bool i915_request_started(const struct i915_request *rq)
>   {
> +	bool result;
> +
>   	if (i915_request_signaled(rq))
>   		return true;
>   
> -	/* Remember: started but may have since been preempted! */
> -	return __i915_request_has_started(rq);
> +	result = true;
> +	rcu_read_lock(); /* the HWSP may be freed at runtime */
> +	if (likely(!i915_request_signaled(rq)))
> +		/* Remember: started but may have since been preempted! */
> +		result = __i915_request_has_started(rq);
> +	rcu_read_unlock();
> +
> +	return result;
>   }
>   
>   /**
> @@ -488,10 +496,16 @@ static inline bool i915_request_started(const struct i915_request *rq)
>    */
>   static inline bool i915_request_is_running(const struct i915_request *rq)
>   {
> +	bool result;
> +
>   	if (!i915_request_is_active(rq))
>   		return false;
>   
> -	return __i915_request_has_started(rq);
> +	rcu_read_lock();
> +	result = __i915_request_has_started(rq) && i915_request_is_active(rq);
> +	rcu_read_unlock();
> +
> +	return result;
>   }
>   
>   /**
> @@ -515,12 +529,25 @@ static inline bool i915_request_is_ready(const struct i915_request *rq)
>   	return !list_empty(&rq->sched.link);
>   }
>   
> +static inline bool __i915_request_is_complete(const struct i915_request *rq)
> +{
> +	return i915_seqno_passed(__hwsp_seqno(rq), rq->fence.seqno);
> +}
> +
>   static inline bool i915_request_completed(const struct i915_request *rq)
>   {
> +	bool result;
> +
>   	if (i915_request_signaled(rq))
>   		return true;
>   
> -	return i915_seqno_passed(hwsp_seqno(rq), rq->fence.seqno);
> +	result = true;
> +	rcu_read_lock(); /* the HWSP may be freed at runtime */
> +	if (likely(!i915_request_signaled(rq)))
> +		result = __i915_request_is_complete(rq);
> +	rcu_read_unlock();
> +
> +	return result; >   }
>   
>   static inline void i915_request_mark_complete(struct i915_request *rq)
> 

Should rq->hwsp_seqno be marked as rcu pointer?

We reset the fence signaled status before re-assigning the timeline. So 
if we were to query a request in the process of being allocated, do we 
need to do something extra to make sure !signaled status is not seen 
before the rq->hwsp_seqno is replaced? Like should the order of re-init 
be inverted?

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [Intel-gfx] [PATCH v2] drm/i915: Check for rq->hwsp validity after acquiring RCU lock
  2020-12-18 15:52     ` Tvrtko Ursulin
@ 2020-12-18 16:07       ` Chris Wilson
  -1 siblings, 0 replies; 17+ messages in thread
From: Chris Wilson @ 2020-12-18 16:07 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx; +Cc: stable

Quoting Tvrtko Ursulin (2020-12-18 15:52:05)
> 
> On 18/12/2020 12:24, Chris Wilson wrote:
> > Since we allow removing the timeline map at runtime, there is a risk
> > that rq->hwsp points into a stale page. To control that risk, we hold
> > the RCU read lock while reading *rq->hwsp, but we missed a couple of
> > important barriers. First, the unpinning / removal of the timeline map
> > must be after all RCU readers into that map are complete, i.e. after an
> > rcu barrier (in this case courtesy of call_rcu()). Secondly, we must
> > make sure that the rq->hwsp we are about to dereference under the RCU
> > lock is valid. In this case, we make the rq->hwsp pointer safe during
> > i915_request_retire() and so we know that rq->hwsp may become invalid
> > only after the request has been signaled. Therefore is the request is
> > not yet signaled when we acquire rq->hwsp under the RCU, we know that
> > rq->hwsp will remain valid for the duration of the RCU read lock.
> > 
> > This is a very small window that may lead to either considering the
> > request not completed (causing a delay until the request is checked
> > again, any wait for the request is not affected) or dereferencing an
> > invalid pointer.
> > 
> > Fixes: 3adac4689f58 ("drm/i915: Introduce concept of per-timeline (context) HWSP")
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> > Cc: <stable@vger.kernel.org> # v5.1+
> > ---
> >   drivers/gpu/drm/i915/gt/intel_breadcrumbs.c | 11 ++----
> >   drivers/gpu/drm/i915/gt/intel_timeline.c    | 10 +++---
> >   drivers/gpu/drm/i915/i915_request.h         | 37 ++++++++++++++++++---
> >   3 files changed, 39 insertions(+), 19 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
> > index 3c62fd6daa76..f96cd7d9b419 100644
> > --- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
> > +++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
> > @@ -134,11 +134,6 @@ static bool remove_signaling_context(struct intel_breadcrumbs *b,
> >       return true;
> >   }
> >   
> > -static inline bool __request_completed(const struct i915_request *rq)
> > -{
> > -     return i915_seqno_passed(__hwsp_seqno(rq), rq->fence.seqno);
> > -}
> > -
> >   __maybe_unused static bool
> >   check_signal_order(struct intel_context *ce, struct i915_request *rq)
> >   {
> > @@ -245,7 +240,7 @@ static void signal_irq_work(struct irq_work *work)
> >               list_for_each_entry_rcu(rq, &ce->signals, signal_link) {
> >                       bool release;
> >   
> > -                     if (!__request_completed(rq))
> > +                     if (!__i915_request_is_complete(rq))
> >                               break;
> >   
> >                       if (!test_and_clear_bit(I915_FENCE_FLAG_SIGNAL,
> > @@ -380,7 +375,7 @@ static void insert_breadcrumb(struct i915_request *rq)
> >        * straight onto a signaled list, and queue the irq worker for
> >        * its signal completion.
> >        */
> > -     if (__request_completed(rq)) {
> > +     if (__i915_request_is_complete(rq)) {
> >               irq_signal_request(rq, b);
> >               return;
> >       }
> > @@ -468,7 +463,7 @@ void i915_request_cancel_breadcrumb(struct i915_request *rq)
> >       if (release)
> >               intel_context_put(ce);
> >   
> > -     if (__request_completed(rq))
> > +     if (__i915_request_is_complete(rq))
> >               irq_signal_request(rq, b);
> >   
> >       i915_request_put(rq);
> > diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.c b/drivers/gpu/drm/i915/gt/intel_timeline.c
> > index 512afacd2bdc..a005d0165bf4 100644
> > --- a/drivers/gpu/drm/i915/gt/intel_timeline.c
> > +++ b/drivers/gpu/drm/i915/gt/intel_timeline.c
> > @@ -126,6 +126,10 @@ static void __rcu_cacheline_free(struct rcu_head *rcu)
> >       struct intel_timeline_cacheline *cl =
> >               container_of(rcu, typeof(*cl), rcu);
> >   
> > +     /* Must wait until after all *rq->hwsp are complete before removing */
> > +     i915_gem_object_unpin_map(cl->hwsp->vma->obj);
> > +     __idle_hwsp_free(cl->hwsp, ptr_unmask_bits(cl->vaddr, CACHELINE_BITS));
> > +
> >       i915_active_fini(&cl->active);
> >       kfree(cl);
> >   }
> > @@ -133,11 +137,6 @@ static void __rcu_cacheline_free(struct rcu_head *rcu)
> >   static void __idle_cacheline_free(struct intel_timeline_cacheline *cl)
> >   {
> >       GEM_BUG_ON(!i915_active_is_idle(&cl->active));
> > -
> > -     i915_gem_object_unpin_map(cl->hwsp->vma->obj);
> > -     i915_vma_put(cl->hwsp->vma);
> > -     __idle_hwsp_free(cl->hwsp, ptr_unmask_bits(cl->vaddr, CACHELINE_BITS));
> > -
> >       call_rcu(&cl->rcu, __rcu_cacheline_free);
> >   }
> >   
> > @@ -179,7 +178,6 @@ cacheline_alloc(struct intel_timeline_hwsp *hwsp, unsigned int cacheline)
> >               return ERR_CAST(vaddr);
> >       }
> >   
> > -     i915_vma_get(hwsp->vma);
> >       cl->hwsp = hwsp;
> >       cl->vaddr = page_pack_bits(vaddr, cacheline);
> >   
> > diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
> > index 92e4320c50c4..7c4453e60323 100644
> > --- a/drivers/gpu/drm/i915/i915_request.h
> > +++ b/drivers/gpu/drm/i915/i915_request.h
> > @@ -440,7 +440,7 @@ static inline u32 hwsp_seqno(const struct i915_request *rq)
> >   
> >   static inline bool __i915_request_has_started(const struct i915_request *rq)
> >   {
> > -     return i915_seqno_passed(hwsp_seqno(rq), rq->fence.seqno - 1);
> > +     return i915_seqno_passed(__hwsp_seqno(rq), rq->fence.seqno - 1);
> >   }
> >   
> >   /**
> > @@ -471,11 +471,19 @@ static inline bool __i915_request_has_started(const struct i915_request *rq)
> >    */
> >   static inline bool i915_request_started(const struct i915_request *rq)
> >   {
> > +     bool result;
> > +
> >       if (i915_request_signaled(rq))
> >               return true;
> >   
> > -     /* Remember: started but may have since been preempted! */
> > -     return __i915_request_has_started(rq);
> > +     result = true;
> > +     rcu_read_lock(); /* the HWSP may be freed at runtime */
> > +     if (likely(!i915_request_signaled(rq)))
> > +             /* Remember: started but may have since been preempted! */
> > +             result = __i915_request_has_started(rq);
> > +     rcu_read_unlock();
> > +
> > +     return result;
> >   }
> >   
> >   /**
> > @@ -488,10 +496,16 @@ static inline bool i915_request_started(const struct i915_request *rq)
> >    */
> >   static inline bool i915_request_is_running(const struct i915_request *rq)
> >   {
> > +     bool result;
> > +
> >       if (!i915_request_is_active(rq))
> >               return false;
> >   
> > -     return __i915_request_has_started(rq);
> > +     rcu_read_lock();
> > +     result = __i915_request_has_started(rq) && i915_request_is_active(rq);
> > +     rcu_read_unlock();
> > +
> > +     return result;
> >   }
> >   
> >   /**
> > @@ -515,12 +529,25 @@ static inline bool i915_request_is_ready(const struct i915_request *rq)
> >       return !list_empty(&rq->sched.link);
> >   }
> >   
> > +static inline bool __i915_request_is_complete(const struct i915_request *rq)
> > +{
> > +     return i915_seqno_passed(__hwsp_seqno(rq), rq->fence.seqno);
> > +}
> > +
> >   static inline bool i915_request_completed(const struct i915_request *rq)
> >   {
> > +     bool result;
> > +
> >       if (i915_request_signaled(rq))
> >               return true;
> >   
> > -     return i915_seqno_passed(hwsp_seqno(rq), rq->fence.seqno);
> > +     result = true;
> > +     rcu_read_lock(); /* the HWSP may be freed at runtime */
> > +     if (likely(!i915_request_signaled(rq)))
> > +             result = __i915_request_is_complete(rq);
> > +     rcu_read_unlock();
> > +
> > +     return result; >   }
> >   
> >   static inline void i915_request_mark_complete(struct i915_request *rq)
> > 
> 
> Should rq->hwsp_seqno be marked as rcu pointer?

50:50; we're careful enough with the helper, but we don't always call it
from under a rcu read lock. Hmm, that used to be true but now the
interrupt handler is under rcu context. That might not be a huge task to
annotate now, but still a few places that know the request->hwsp is valid
since they are before request submit.

> We reset the fence signaled status before re-assigning the timeline. So 
> if we were to query a request in the process of being allocated, do we 
> need to do something extra to make sure !signaled status is not seen 
> before the rq->hwsp_seqno is replaced? Like should the order of re-init 
> be inverted?

You're thinking of request reuse. The question is where do we look at
i915_request_competed outside of an explicit ref. Off the top of my head
that is only gem_busy. (Similar thought exercise for plain
i915_request_is_signaled, just less pointer dancing.) gem_busy is
protected by the seqlock, that is if the dma-resv is changed and the
fence freed, we restart the inspection.

So... I don't think we have a use where the order matters across
recycling, and so nothing to define the rules.
-Chris

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [Intel-gfx] [PATCH v2] drm/i915: Check for rq->hwsp validity after acquiring RCU lock
@ 2020-12-18 16:07       ` Chris Wilson
  0 siblings, 0 replies; 17+ messages in thread
From: Chris Wilson @ 2020-12-18 16:07 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx; +Cc: stable

Quoting Tvrtko Ursulin (2020-12-18 15:52:05)
> 
> On 18/12/2020 12:24, Chris Wilson wrote:
> > Since we allow removing the timeline map at runtime, there is a risk
> > that rq->hwsp points into a stale page. To control that risk, we hold
> > the RCU read lock while reading *rq->hwsp, but we missed a couple of
> > important barriers. First, the unpinning / removal of the timeline map
> > must be after all RCU readers into that map are complete, i.e. after an
> > rcu barrier (in this case courtesy of call_rcu()). Secondly, we must
> > make sure that the rq->hwsp we are about to dereference under the RCU
> > lock is valid. In this case, we make the rq->hwsp pointer safe during
> > i915_request_retire() and so we know that rq->hwsp may become invalid
> > only after the request has been signaled. Therefore is the request is
> > not yet signaled when we acquire rq->hwsp under the RCU, we know that
> > rq->hwsp will remain valid for the duration of the RCU read lock.
> > 
> > This is a very small window that may lead to either considering the
> > request not completed (causing a delay until the request is checked
> > again, any wait for the request is not affected) or dereferencing an
> > invalid pointer.
> > 
> > Fixes: 3adac4689f58 ("drm/i915: Introduce concept of per-timeline (context) HWSP")
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> > Cc: <stable@vger.kernel.org> # v5.1+
> > ---
> >   drivers/gpu/drm/i915/gt/intel_breadcrumbs.c | 11 ++----
> >   drivers/gpu/drm/i915/gt/intel_timeline.c    | 10 +++---
> >   drivers/gpu/drm/i915/i915_request.h         | 37 ++++++++++++++++++---
> >   3 files changed, 39 insertions(+), 19 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
> > index 3c62fd6daa76..f96cd7d9b419 100644
> > --- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
> > +++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
> > @@ -134,11 +134,6 @@ static bool remove_signaling_context(struct intel_breadcrumbs *b,
> >       return true;
> >   }
> >   
> > -static inline bool __request_completed(const struct i915_request *rq)
> > -{
> > -     return i915_seqno_passed(__hwsp_seqno(rq), rq->fence.seqno);
> > -}
> > -
> >   __maybe_unused static bool
> >   check_signal_order(struct intel_context *ce, struct i915_request *rq)
> >   {
> > @@ -245,7 +240,7 @@ static void signal_irq_work(struct irq_work *work)
> >               list_for_each_entry_rcu(rq, &ce->signals, signal_link) {
> >                       bool release;
> >   
> > -                     if (!__request_completed(rq))
> > +                     if (!__i915_request_is_complete(rq))
> >                               break;
> >   
> >                       if (!test_and_clear_bit(I915_FENCE_FLAG_SIGNAL,
> > @@ -380,7 +375,7 @@ static void insert_breadcrumb(struct i915_request *rq)
> >        * straight onto a signaled list, and queue the irq worker for
> >        * its signal completion.
> >        */
> > -     if (__request_completed(rq)) {
> > +     if (__i915_request_is_complete(rq)) {
> >               irq_signal_request(rq, b);
> >               return;
> >       }
> > @@ -468,7 +463,7 @@ void i915_request_cancel_breadcrumb(struct i915_request *rq)
> >       if (release)
> >               intel_context_put(ce);
> >   
> > -     if (__request_completed(rq))
> > +     if (__i915_request_is_complete(rq))
> >               irq_signal_request(rq, b);
> >   
> >       i915_request_put(rq);
> > diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.c b/drivers/gpu/drm/i915/gt/intel_timeline.c
> > index 512afacd2bdc..a005d0165bf4 100644
> > --- a/drivers/gpu/drm/i915/gt/intel_timeline.c
> > +++ b/drivers/gpu/drm/i915/gt/intel_timeline.c
> > @@ -126,6 +126,10 @@ static void __rcu_cacheline_free(struct rcu_head *rcu)
> >       struct intel_timeline_cacheline *cl =
> >               container_of(rcu, typeof(*cl), rcu);
> >   
> > +     /* Must wait until after all *rq->hwsp are complete before removing */
> > +     i915_gem_object_unpin_map(cl->hwsp->vma->obj);
> > +     __idle_hwsp_free(cl->hwsp, ptr_unmask_bits(cl->vaddr, CACHELINE_BITS));
> > +
> >       i915_active_fini(&cl->active);
> >       kfree(cl);
> >   }
> > @@ -133,11 +137,6 @@ static void __rcu_cacheline_free(struct rcu_head *rcu)
> >   static void __idle_cacheline_free(struct intel_timeline_cacheline *cl)
> >   {
> >       GEM_BUG_ON(!i915_active_is_idle(&cl->active));
> > -
> > -     i915_gem_object_unpin_map(cl->hwsp->vma->obj);
> > -     i915_vma_put(cl->hwsp->vma);
> > -     __idle_hwsp_free(cl->hwsp, ptr_unmask_bits(cl->vaddr, CACHELINE_BITS));
> > -
> >       call_rcu(&cl->rcu, __rcu_cacheline_free);
> >   }
> >   
> > @@ -179,7 +178,6 @@ cacheline_alloc(struct intel_timeline_hwsp *hwsp, unsigned int cacheline)
> >               return ERR_CAST(vaddr);
> >       }
> >   
> > -     i915_vma_get(hwsp->vma);
> >       cl->hwsp = hwsp;
> >       cl->vaddr = page_pack_bits(vaddr, cacheline);
> >   
> > diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
> > index 92e4320c50c4..7c4453e60323 100644
> > --- a/drivers/gpu/drm/i915/i915_request.h
> > +++ b/drivers/gpu/drm/i915/i915_request.h
> > @@ -440,7 +440,7 @@ static inline u32 hwsp_seqno(const struct i915_request *rq)
> >   
> >   static inline bool __i915_request_has_started(const struct i915_request *rq)
> >   {
> > -     return i915_seqno_passed(hwsp_seqno(rq), rq->fence.seqno - 1);
> > +     return i915_seqno_passed(__hwsp_seqno(rq), rq->fence.seqno - 1);
> >   }
> >   
> >   /**
> > @@ -471,11 +471,19 @@ static inline bool __i915_request_has_started(const struct i915_request *rq)
> >    */
> >   static inline bool i915_request_started(const struct i915_request *rq)
> >   {
> > +     bool result;
> > +
> >       if (i915_request_signaled(rq))
> >               return true;
> >   
> > -     /* Remember: started but may have since been preempted! */
> > -     return __i915_request_has_started(rq);
> > +     result = true;
> > +     rcu_read_lock(); /* the HWSP may be freed at runtime */
> > +     if (likely(!i915_request_signaled(rq)))
> > +             /* Remember: started but may have since been preempted! */
> > +             result = __i915_request_has_started(rq);
> > +     rcu_read_unlock();
> > +
> > +     return result;
> >   }
> >   
> >   /**
> > @@ -488,10 +496,16 @@ static inline bool i915_request_started(const struct i915_request *rq)
> >    */
> >   static inline bool i915_request_is_running(const struct i915_request *rq)
> >   {
> > +     bool result;
> > +
> >       if (!i915_request_is_active(rq))
> >               return false;
> >   
> > -     return __i915_request_has_started(rq);
> > +     rcu_read_lock();
> > +     result = __i915_request_has_started(rq) && i915_request_is_active(rq);
> > +     rcu_read_unlock();
> > +
> > +     return result;
> >   }
> >   
> >   /**
> > @@ -515,12 +529,25 @@ static inline bool i915_request_is_ready(const struct i915_request *rq)
> >       return !list_empty(&rq->sched.link);
> >   }
> >   
> > +static inline bool __i915_request_is_complete(const struct i915_request *rq)
> > +{
> > +     return i915_seqno_passed(__hwsp_seqno(rq), rq->fence.seqno);
> > +}
> > +
> >   static inline bool i915_request_completed(const struct i915_request *rq)
> >   {
> > +     bool result;
> > +
> >       if (i915_request_signaled(rq))
> >               return true;
> >   
> > -     return i915_seqno_passed(hwsp_seqno(rq), rq->fence.seqno);
> > +     result = true;
> > +     rcu_read_lock(); /* the HWSP may be freed at runtime */
> > +     if (likely(!i915_request_signaled(rq)))
> > +             result = __i915_request_is_complete(rq);
> > +     rcu_read_unlock();
> > +
> > +     return result; >   }
> >   
> >   static inline void i915_request_mark_complete(struct i915_request *rq)
> > 
> 
> Should rq->hwsp_seqno be marked as rcu pointer?

50:50; we're careful enough with the helper, but we don't always call it
from under a rcu read lock. Hmm, that used to be true but now the
interrupt handler is under rcu context. That might not be a huge task to
annotate now, but still a few places that know the request->hwsp is valid
since they are before request submit.

> We reset the fence signaled status before re-assigning the timeline. So 
> if we were to query a request in the process of being allocated, do we 
> need to do something extra to make sure !signaled status is not seen 
> before the rq->hwsp_seqno is replaced? Like should the order of re-init 
> be inverted?

You're thinking of request reuse. The question is where do we look at
i915_request_competed outside of an explicit ref. Off the top of my head
that is only gem_busy. (Similar thought exercise for plain
i915_request_is_signaled, just less pointer dancing.) gem_busy is
protected by the seqlock, that is if the dma-resv is changed and the
fence freed, we restart the inspection.

So... I don't think we have a use where the order matters across
recycling, and so nothing to define the rules.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 17+ messages in thread

* [Intel-gfx] ✓ Fi.CI.IGT: success for drm/i915: Check for rq->hwsp validity after acquiring RCU lock (rev2)
  2020-12-18  9:19 ` [Intel-gfx] " Chris Wilson
                   ` (6 preceding siblings ...)
  (?)
@ 2020-12-18 16:25 ` Patchwork
  -1 siblings, 0 replies; 17+ messages in thread
From: Patchwork @ 2020-12-18 16:25 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx


[-- Attachment #1.1: Type: text/plain, Size: 16000 bytes --]

== Series Details ==

Series: drm/i915: Check for rq->hwsp validity after acquiring RCU lock (rev2)
URL   : https://patchwork.freedesktop.org/series/85071/
State : success

== Summary ==

CI Bug Log - changes from CI_DRM_9503_full -> Patchwork_19178_full
====================================================

Summary
-------

  **SUCCESS**

  No regressions found.

  

Possible new issues
-------------------

  Here are the unknown changes that may have been introduced in Patchwork_19178_full:

### IGT changes ###

#### Suppressed ####

  The following results come from untrusted machines, tests, or statuses.
  They do not affect the overall result.

  * {igt@gem_vm_create@destroy-race}:
    - shard-tglb:         [PASS][1] -> [INCOMPLETE][2]
   [1]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9503/shard-tglb1/igt@gem_vm_create@destroy-race.html
   [2]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19178/shard-tglb1/igt@gem_vm_create@destroy-race.html

  
Known issues
------------

  Here are the changes found in Patchwork_19178_full that come from known issues:

### IGT changes ###

#### Issues hit ####

  * igt@gem_ctx_persistence@legacy-engines-cleanup:
    - shard-hsw:          NOTRUN -> [SKIP][3] ([fdo#109271] / [i915#1099])
   [3]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19178/shard-hsw5/igt@gem_ctx_persistence@legacy-engines-cleanup.html

  * igt@gem_exec_reloc@basic-wide-active@vcs1:
    - shard-iclb:         NOTRUN -> [FAIL][4] ([i915#2389])
   [4]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19178/shard-iclb4/igt@gem_exec_reloc@basic-wide-active@vcs1.html

  * igt@gem_userptr_blits@huge-split:
    - shard-skl:          [PASS][5] -> [INCOMPLETE][6] ([i915#2502])
   [5]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9503/shard-skl3/igt@gem_userptr_blits@huge-split.html
   [6]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19178/shard-skl10/igt@gem_userptr_blits@huge-split.html

  * igt@gem_workarounds@suspend-resume:
    - shard-skl:          [PASS][7] -> [INCOMPLETE][8] ([i915#198]) +1 similar issue
   [7]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9503/shard-skl8/igt@gem_workarounds@suspend-resume.html
   [8]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19178/shard-skl2/igt@gem_workarounds@suspend-resume.html

  * igt@gem_workarounds@suspend-resume-fd:
    - shard-skl:          [PASS][9] -> [INCOMPLETE][10] ([i915#198] / [i915#2405])
   [9]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9503/shard-skl2/igt@gem_workarounds@suspend-resume-fd.html
   [10]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19178/shard-skl5/igt@gem_workarounds@suspend-resume-fd.html

  * igt@gen9_exec_parse@allowed-all:
    - shard-glk:          [PASS][11] -> [DMESG-WARN][12] ([i915#1436] / [i915#716])
   [11]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9503/shard-glk9/igt@gen9_exec_parse@allowed-all.html
   [12]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19178/shard-glk3/igt@gen9_exec_parse@allowed-all.html

  * igt@i915_pm_rc6_residency@rc6-fence:
    - shard-hsw:          [PASS][13] -> [WARN][14] ([i915#1519])
   [13]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9503/shard-hsw4/igt@i915_pm_rc6_residency@rc6-fence.html
   [14]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19178/shard-hsw6/igt@i915_pm_rc6_residency@rc6-fence.html

  * igt@kms_big_fb@yf-tiled-8bpp-rotate-270:
    - shard-glk:          NOTRUN -> [SKIP][15] ([fdo#109271]) +17 similar issues
   [15]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19178/shard-glk6/igt@kms_big_fb@yf-tiled-8bpp-rotate-270.html

  * igt@kms_chamelium@hdmi-edid-change-during-suspend:
    - shard-apl:          NOTRUN -> [SKIP][16] ([fdo#109271] / [fdo#111827]) +3 similar issues
   [16]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19178/shard-apl2/igt@kms_chamelium@hdmi-edid-change-during-suspend.html

  * igt@kms_chamelium@hdmi-hpd-for-each-pipe:
    - shard-hsw:          NOTRUN -> [SKIP][17] ([fdo#109271] / [fdo#111827]) +2 similar issues
   [17]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19178/shard-hsw5/igt@kms_chamelium@hdmi-hpd-for-each-pipe.html

  * igt@kms_chamelium@hdmi-mode-timings:
    - shard-glk:          NOTRUN -> [SKIP][18] ([fdo#109271] / [fdo#111827]) +3 similar issues
   [18]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19178/shard-glk6/igt@kms_chamelium@hdmi-mode-timings.html

  * igt@kms_color@pipe-b-ctm-0-25:
    - shard-skl:          [PASS][19] -> [DMESG-WARN][20] ([i915#1982])
   [19]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9503/shard-skl9/igt@kms_color@pipe-b-ctm-0-25.html
   [20]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19178/shard-skl9/igt@kms_color@pipe-b-ctm-0-25.html

  * igt@kms_cursor_crc@pipe-c-cursor-128x42-sliding:
    - shard-skl:          [PASS][21] -> [FAIL][22] ([i915#54])
   [21]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9503/shard-skl9/igt@kms_cursor_crc@pipe-c-cursor-128x42-sliding.html
   [22]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19178/shard-skl3/igt@kms_cursor_crc@pipe-c-cursor-128x42-sliding.html

  * igt@kms_cursor_crc@pipe-c-cursor-64x21-random:
    - shard-skl:          NOTRUN -> [FAIL][23] ([i915#54])
   [23]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19178/shard-skl8/igt@kms_cursor_crc@pipe-c-cursor-64x21-random.html

  * igt@kms_cursor_crc@pipe-d-cursor-512x170-onscreen:
    - shard-skl:          NOTRUN -> [SKIP][24] ([fdo#109271]) +6 similar issues
   [24]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19178/shard-skl8/igt@kms_cursor_crc@pipe-d-cursor-512x170-onscreen.html

  * igt@kms_cursor_legacy@pipe-d-torture-bo:
    - shard-apl:          NOTRUN -> [SKIP][25] ([fdo#109271] / [i915#533])
   [25]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19178/shard-apl2/igt@kms_cursor_legacy@pipe-d-torture-bo.html

  * igt@kms_frontbuffer_tracking@psr-1p-primscrn-shrfb-pgflip-blt:
    - shard-apl:          NOTRUN -> [SKIP][26] ([fdo#109271]) +19 similar issues
   [26]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19178/shard-apl2/igt@kms_frontbuffer_tracking@psr-1p-primscrn-shrfb-pgflip-blt.html

  * igt@kms_pipe_crc_basic@suspend-read-crc-pipe-a:
    - shard-kbl:          [PASS][27] -> [DMESG-WARN][28] ([i915#180])
   [27]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9503/shard-kbl2/igt@kms_pipe_crc_basic@suspend-read-crc-pipe-a.html
   [28]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19178/shard-kbl7/igt@kms_pipe_crc_basic@suspend-read-crc-pipe-a.html

  * igt@kms_pipe_crc_basic@suspend-read-crc-pipe-d:
    - shard-glk:          NOTRUN -> [SKIP][29] ([fdo#109271] / [i915#533]) +1 similar issue
   [29]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19178/shard-glk6/igt@kms_pipe_crc_basic@suspend-read-crc-pipe-d.html

  * igt@kms_plane_alpha_blend@pipe-a-alpha-transparent-fb:
    - shard-apl:          NOTRUN -> [FAIL][30] ([i915#265])
   [30]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19178/shard-apl2/igt@kms_plane_alpha_blend@pipe-a-alpha-transparent-fb.html

  * igt@kms_plane_alpha_blend@pipe-c-constant-alpha-max:
    - shard-skl:          NOTRUN -> [FAIL][31] ([fdo#108145] / [i915#265])
   [31]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19178/shard-skl6/igt@kms_plane_alpha_blend@pipe-c-constant-alpha-max.html

  * igt@kms_plane_alpha_blend@pipe-c-coverage-7efc:
    - shard-skl:          [PASS][32] -> [FAIL][33] ([fdo#108145] / [i915#265])
   [32]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9503/shard-skl5/igt@kms_plane_alpha_blend@pipe-c-coverage-7efc.html
   [33]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19178/shard-skl6/igt@kms_plane_alpha_blend@pipe-c-coverage-7efc.html

  * igt@kms_properties@connector-properties-legacy:
    - shard-kbl:          [PASS][34] -> [DMESG-WARN][35] ([i915#165] / [i915#180] / [i915#78])
   [34]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9503/shard-kbl4/igt@kms_properties@connector-properties-legacy.html
   [35]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19178/shard-kbl2/igt@kms_properties@connector-properties-legacy.html

  * igt@kms_vblank@pipe-a-ts-continuation-suspend:
    - shard-skl:          [PASS][36] -> [INCOMPLETE][37] ([i915#198] / [i915#2295])
   [36]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9503/shard-skl1/igt@kms_vblank@pipe-a-ts-continuation-suspend.html
   [37]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19178/shard-skl1/igt@kms_vblank@pipe-a-ts-continuation-suspend.html

  * igt@prime_nv_api@i915_nv_import_twice:
    - shard-hsw:          NOTRUN -> [SKIP][38] ([fdo#109271]) +45 similar issues
   [38]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19178/shard-hsw5/igt@prime_nv_api@i915_nv_import_twice.html

  
#### Possible fixes ####

  * {igt@gem_exec_schedule@u-fairslice@rcs0}:
    - shard-apl:          [DMESG-WARN][39] ([i915#1610]) -> [PASS][40]
   [39]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9503/shard-apl4/igt@gem_exec_schedule@u-fairslice@rcs0.html
   [40]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19178/shard-apl2/igt@gem_exec_schedule@u-fairslice@rcs0.html

  * igt@gem_exec_whisper@basic-queues-priority-all:
    - shard-glk:          [DMESG-WARN][41] ([i915#118] / [i915#95]) -> [PASS][42]
   [41]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9503/shard-glk6/igt@gem_exec_whisper@basic-queues-priority-all.html
   [42]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19178/shard-glk7/igt@gem_exec_whisper@basic-queues-priority-all.html

  * igt@gen9_exec_parse@allowed-single:
    - shard-glk:          [DMESG-WARN][43] ([i915#1436] / [i915#716]) -> [PASS][44]
   [43]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9503/shard-glk9/igt@gen9_exec_parse@allowed-single.html
   [44]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19178/shard-glk6/igt@gen9_exec_parse@allowed-single.html

  * igt@i915_pm_dc@dc6-psr:
    - shard-iclb:         [FAIL][45] ([i915#454]) -> [PASS][46]
   [45]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9503/shard-iclb6/igt@i915_pm_dc@dc6-psr.html
   [46]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19178/shard-iclb1/igt@i915_pm_dc@dc6-psr.html

  * igt@i915_pm_rpm@modeset-non-lpsp-stress:
    - shard-kbl:          [DMESG-WARN][47] ([i915#165] / [i915#180]) -> [PASS][48]
   [47]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9503/shard-kbl2/igt@i915_pm_rpm@modeset-non-lpsp-stress.html
   [48]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19178/shard-kbl7/igt@i915_pm_rpm@modeset-non-lpsp-stress.html

  * igt@kms_cursor_crc@pipe-c-cursor-128x128-sliding:
    - shard-skl:          [FAIL][49] ([i915#54]) -> [PASS][50] +1 similar issue
   [49]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9503/shard-skl8/igt@kms_cursor_crc@pipe-c-cursor-128x128-sliding.html
   [50]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19178/shard-skl2/igt@kms_cursor_crc@pipe-c-cursor-128x128-sliding.html

  * igt@kms_flip@plain-flip-ts-check@a-edp1:
    - shard-skl:          [FAIL][51] ([i915#2122]) -> [PASS][52]
   [51]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9503/shard-skl3/igt@kms_flip@plain-flip-ts-check@a-edp1.html
   [52]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19178/shard-skl10/igt@kms_flip@plain-flip-ts-check@a-edp1.html

  
#### Warnings ####

  * igt@runner@aborted:
    - shard-kbl:          ([FAIL][53], [FAIL][54]) ([i915#2295] / [i915#483]) -> ([FAIL][55], [FAIL][56], [FAIL][57]) ([i915#1814] / [i915#2295] / [i915#483])
   [53]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9503/shard-kbl6/igt@runner@aborted.html
   [54]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9503/shard-kbl6/igt@runner@aborted.html
   [55]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19178/shard-kbl1/igt@runner@aborted.html
   [56]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19178/shard-kbl1/igt@runner@aborted.html
   [57]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19178/shard-kbl7/igt@runner@aborted.html
    - shard-apl:          ([FAIL][58], [FAIL][59], [FAIL][60]) ([i915#1610] / [i915#2295] / [i915#2426]) -> ([FAIL][61], [FAIL][62]) ([i915#2295])
   [58]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9503/shard-apl4/igt@runner@aborted.html
   [59]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9503/shard-apl4/igt@runner@aborted.html
   [60]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9503/shard-apl8/igt@runner@aborted.html
   [61]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19178/shard-apl8/igt@runner@aborted.html
   [62]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19178/shard-apl8/igt@runner@aborted.html
    - shard-skl:          ([FAIL][63], [FAIL][64]) ([i915#2295]) -> ([FAIL][65], [FAIL][66], [FAIL][67]) ([i915#2295] / [i915#483])
   [63]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9503/shard-skl2/igt@runner@aborted.html
   [64]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_9503/shard-skl10/igt@runner@aborted.html
   [65]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19178/shard-skl10/igt@runner@aborted.html
   [66]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19178/shard-skl7/igt@runner@aborted.html
   [67]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19178/shard-skl7/igt@runner@aborted.html

  
  {name}: This element is suppressed. This means it is ignored when computing
          the status of the difference (SUCCESS, WARNING, or FAILURE).

  [fdo#108145]: https://bugs.freedesktop.org/show_bug.cgi?id=108145
  [fdo#109271]: https://bugs.freedesktop.org/show_bug.cgi?id=109271
  [fdo#111827]: https://bugs.freedesktop.org/show_bug.cgi?id=111827
  [i915#1099]: https://gitlab.freedesktop.org/drm/intel/issues/1099
  [i915#118]: https://gitlab.freedesktop.org/drm/intel/issues/118
  [i915#1436]: https://gitlab.freedesktop.org/drm/intel/issues/1436
  [i915#1519]: https://gitlab.freedesktop.org/drm/intel/issues/1519
  [i915#1610]: https://gitlab.freedesktop.org/drm/intel/issues/1610
  [i915#165]: https://gitlab.freedesktop.org/drm/intel/issues/165
  [i915#180]: https://gitlab.freedesktop.org/drm/intel/issues/180
  [i915#1814]: https://gitlab.freedesktop.org/drm/intel/issues/1814
  [i915#198]: https://gitlab.freedesktop.org/drm/intel/issues/198
  [i915#1982]: https://gitlab.freedesktop.org/drm/intel/issues/1982
  [i915#2122]: https://gitlab.freedesktop.org/drm/intel/issues/2122
  [i915#2295]: https://gitlab.freedesktop.org/drm/intel/issues/2295
  [i915#2389]: https://gitlab.freedesktop.org/drm/intel/issues/2389
  [i915#2405]: https://gitlab.freedesktop.org/drm/intel/issues/2405
  [i915#2426]: https://gitlab.freedesktop.org/drm/intel/issues/2426
  [i915#2502]: https://gitlab.freedesktop.org/drm/intel/issues/2502
  [i915#265]: https://gitlab.freedesktop.org/drm/intel/issues/265
  [i915#2802]: https://gitlab.freedesktop.org/drm/intel/issues/2802
  [i915#454]: https://gitlab.freedesktop.org/drm/intel/issues/454
  [i915#483]: https://gitlab.freedesktop.org/drm/intel/issues/483
  [i915#533]: https://gitlab.freedesktop.org/drm/intel/issues/533
  [i915#54]: https://gitlab.freedesktop.org/drm/intel/issues/54
  [i915#716]: https://gitlab.freedesktop.org/drm/intel/issues/716
  [i915#78]: https://gitlab.freedesktop.org/drm/intel/issues/78
  [i915#95]: https://gitlab.freedesktop.org/drm/intel/issues/95


Participating hosts (10 -> 10)
------------------------------

  No changes in participating hosts


Build changes
-------------

  * Linux: CI_DRM_9503 -> Patchwork_19178

  CI-20190529: 20190529
  CI_DRM_9503: 82c5c0ad8d578504865837b2135b60dd2d0054a1 @ git://anongit.freedesktop.org/gfx-ci/linux
  IGT_5909: 3d6caf71a3e988cd125eb9efdd0a7cdcd0451673 @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
  Patchwork_19178: a93ba5a18bf8acda650027601343a1bdc52b84ec @ git://anongit.freedesktop.org/gfx-ci/linux
  piglit_4509: fdc5a4ca11124ab8413c7988896eec4c97336694 @ git://anongit.freedesktop.org/piglit

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_19178/index.html

[-- Attachment #1.2: Type: text/html, Size: 19574 bytes --]

[-- Attachment #2: Type: text/plain, Size: 160 bytes --]

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [Intel-gfx] [PATCH v2] drm/i915: Check for rq->hwsp validity after acquiring RCU lock
  2020-12-18 16:07       ` Chris Wilson
@ 2020-12-18 16:32         ` Tvrtko Ursulin
  -1 siblings, 0 replies; 17+ messages in thread
From: Tvrtko Ursulin @ 2020-12-18 16:32 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: stable


On 18/12/2020 16:07, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2020-12-18 15:52:05)
>>
>> On 18/12/2020 12:24, Chris Wilson wrote:
>>> Since we allow removing the timeline map at runtime, there is a risk
>>> that rq->hwsp points into a stale page. To control that risk, we hold
>>> the RCU read lock while reading *rq->hwsp, but we missed a couple of
>>> important barriers. First, the unpinning / removal of the timeline map
>>> must be after all RCU readers into that map are complete, i.e. after an
>>> rcu barrier (in this case courtesy of call_rcu()). Secondly, we must
>>> make sure that the rq->hwsp we are about to dereference under the RCU
>>> lock is valid. In this case, we make the rq->hwsp pointer safe during
>>> i915_request_retire() and so we know that rq->hwsp may become invalid
>>> only after the request has been signaled. Therefore is the request is
>>> not yet signaled when we acquire rq->hwsp under the RCU, we know that
>>> rq->hwsp will remain valid for the duration of the RCU read lock.
>>>
>>> This is a very small window that may lead to either considering the
>>> request not completed (causing a delay until the request is checked
>>> again, any wait for the request is not affected) or dereferencing an
>>> invalid pointer.
>>>
>>> Fixes: 3adac4689f58 ("drm/i915: Introduce concept of per-timeline (context) HWSP")
>>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>>> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>> Cc: <stable@vger.kernel.org> # v5.1+
>>> ---
>>>    drivers/gpu/drm/i915/gt/intel_breadcrumbs.c | 11 ++----
>>>    drivers/gpu/drm/i915/gt/intel_timeline.c    | 10 +++---
>>>    drivers/gpu/drm/i915/i915_request.h         | 37 ++++++++++++++++++---
>>>    3 files changed, 39 insertions(+), 19 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
>>> index 3c62fd6daa76..f96cd7d9b419 100644
>>> --- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
>>> +++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
>>> @@ -134,11 +134,6 @@ static bool remove_signaling_context(struct intel_breadcrumbs *b,
>>>        return true;
>>>    }
>>>    
>>> -static inline bool __request_completed(const struct i915_request *rq)
>>> -{
>>> -     return i915_seqno_passed(__hwsp_seqno(rq), rq->fence.seqno);
>>> -}
>>> -
>>>    __maybe_unused static bool
>>>    check_signal_order(struct intel_context *ce, struct i915_request *rq)
>>>    {
>>> @@ -245,7 +240,7 @@ static void signal_irq_work(struct irq_work *work)
>>>                list_for_each_entry_rcu(rq, &ce->signals, signal_link) {
>>>                        bool release;
>>>    
>>> -                     if (!__request_completed(rq))
>>> +                     if (!__i915_request_is_complete(rq))
>>>                                break;
>>>    
>>>                        if (!test_and_clear_bit(I915_FENCE_FLAG_SIGNAL,
>>> @@ -380,7 +375,7 @@ static void insert_breadcrumb(struct i915_request *rq)
>>>         * straight onto a signaled list, and queue the irq worker for
>>>         * its signal completion.
>>>         */
>>> -     if (__request_completed(rq)) {
>>> +     if (__i915_request_is_complete(rq)) {
>>>                irq_signal_request(rq, b);
>>>                return;
>>>        }
>>> @@ -468,7 +463,7 @@ void i915_request_cancel_breadcrumb(struct i915_request *rq)
>>>        if (release)
>>>                intel_context_put(ce);
>>>    
>>> -     if (__request_completed(rq))
>>> +     if (__i915_request_is_complete(rq))
>>>                irq_signal_request(rq, b);
>>>    
>>>        i915_request_put(rq);
>>> diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.c b/drivers/gpu/drm/i915/gt/intel_timeline.c
>>> index 512afacd2bdc..a005d0165bf4 100644
>>> --- a/drivers/gpu/drm/i915/gt/intel_timeline.c
>>> +++ b/drivers/gpu/drm/i915/gt/intel_timeline.c
>>> @@ -126,6 +126,10 @@ static void __rcu_cacheline_free(struct rcu_head *rcu)
>>>        struct intel_timeline_cacheline *cl =
>>>                container_of(rcu, typeof(*cl), rcu);
>>>    
>>> +     /* Must wait until after all *rq->hwsp are complete before removing */
>>> +     i915_gem_object_unpin_map(cl->hwsp->vma->obj);
>>> +     __idle_hwsp_free(cl->hwsp, ptr_unmask_bits(cl->vaddr, CACHELINE_BITS));
>>> +
>>>        i915_active_fini(&cl->active);
>>>        kfree(cl);
>>>    }
>>> @@ -133,11 +137,6 @@ static void __rcu_cacheline_free(struct rcu_head *rcu)
>>>    static void __idle_cacheline_free(struct intel_timeline_cacheline *cl)
>>>    {
>>>        GEM_BUG_ON(!i915_active_is_idle(&cl->active));
>>> -
>>> -     i915_gem_object_unpin_map(cl->hwsp->vma->obj);
>>> -     i915_vma_put(cl->hwsp->vma);
>>> -     __idle_hwsp_free(cl->hwsp, ptr_unmask_bits(cl->vaddr, CACHELINE_BITS));
>>> -
>>>        call_rcu(&cl->rcu, __rcu_cacheline_free);
>>>    }
>>>    
>>> @@ -179,7 +178,6 @@ cacheline_alloc(struct intel_timeline_hwsp *hwsp, unsigned int cacheline)
>>>                return ERR_CAST(vaddr);
>>>        }
>>>    
>>> -     i915_vma_get(hwsp->vma);
>>>        cl->hwsp = hwsp;
>>>        cl->vaddr = page_pack_bits(vaddr, cacheline);
>>>    
>>> diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
>>> index 92e4320c50c4..7c4453e60323 100644
>>> --- a/drivers/gpu/drm/i915/i915_request.h
>>> +++ b/drivers/gpu/drm/i915/i915_request.h
>>> @@ -440,7 +440,7 @@ static inline u32 hwsp_seqno(const struct i915_request *rq)
>>>    
>>>    static inline bool __i915_request_has_started(const struct i915_request *rq)
>>>    {
>>> -     return i915_seqno_passed(hwsp_seqno(rq), rq->fence.seqno - 1);
>>> +     return i915_seqno_passed(__hwsp_seqno(rq), rq->fence.seqno - 1);
>>>    }
>>>    
>>>    /**
>>> @@ -471,11 +471,19 @@ static inline bool __i915_request_has_started(const struct i915_request *rq)
>>>     */
>>>    static inline bool i915_request_started(const struct i915_request *rq)
>>>    {
>>> +     bool result;
>>> +
>>>        if (i915_request_signaled(rq))
>>>                return true;
>>>    
>>> -     /* Remember: started but may have since been preempted! */
>>> -     return __i915_request_has_started(rq);
>>> +     result = true;
>>> +     rcu_read_lock(); /* the HWSP may be freed at runtime */
>>> +     if (likely(!i915_request_signaled(rq)))
>>> +             /* Remember: started but may have since been preempted! */
>>> +             result = __i915_request_has_started(rq);
>>> +     rcu_read_unlock();
>>> +
>>> +     return result;
>>>    }
>>>    
>>>    /**
>>> @@ -488,10 +496,16 @@ static inline bool i915_request_started(const struct i915_request *rq)
>>>     */
>>>    static inline bool i915_request_is_running(const struct i915_request *rq)
>>>    {
>>> +     bool result;
>>> +
>>>        if (!i915_request_is_active(rq))
>>>                return false;
>>>    
>>> -     return __i915_request_has_started(rq);
>>> +     rcu_read_lock();
>>> +     result = __i915_request_has_started(rq) && i915_request_is_active(rq);
>>> +     rcu_read_unlock();
>>> +
>>> +     return result;
>>>    }
>>>    
>>>    /**
>>> @@ -515,12 +529,25 @@ static inline bool i915_request_is_ready(const struct i915_request *rq)
>>>        return !list_empty(&rq->sched.link);
>>>    }
>>>    
>>> +static inline bool __i915_request_is_complete(const struct i915_request *rq)
>>> +{
>>> +     return i915_seqno_passed(__hwsp_seqno(rq), rq->fence.seqno);
>>> +}
>>> +
>>>    static inline bool i915_request_completed(const struct i915_request *rq)
>>>    {
>>> +     bool result;
>>> +
>>>        if (i915_request_signaled(rq))
>>>                return true;
>>>    
>>> -     return i915_seqno_passed(hwsp_seqno(rq), rq->fence.seqno);
>>> +     result = true;
>>> +     rcu_read_lock(); /* the HWSP may be freed at runtime */
>>> +     if (likely(!i915_request_signaled(rq)))
>>> +             result = __i915_request_is_complete(rq);
>>> +     rcu_read_unlock();
>>> +
>>> +     return result; >   }
>>>    
>>>    static inline void i915_request_mark_complete(struct i915_request *rq)
>>>
>>
>> Should rq->hwsp_seqno be marked as rcu pointer?
> 
> 50:50; we're careful enough with the helper, but we don't always call it
> from under a rcu read lock. Hmm, that used to be true but now the
> interrupt handler is under rcu context. That might not be a huge task to
> annotate now, but still a few places that know the request->hwsp is valid
> since they are before request submit.
> 
>> We reset the fence signaled status before re-assigning the timeline. So
>> if we were to query a request in the process of being allocated, do we
>> need to do something extra to make sure !signaled status is not seen
>> before the rq->hwsp_seqno is replaced? Like should the order of re-init
>> be inverted?
> 
> You're thinking of request reuse. The question is where do we look at
> i915_request_competed outside of an explicit ref. Off the top of my head
> that is only gem_busy. (Similar thought exercise for plain
> i915_request_is_signaled, just less pointer dancing.) gem_busy is
> protected by the seqlock, that is if the dma-resv is changed and the
> fence freed, we restart the inspection.
> 
> So... I don't think we have a use where the order matters across
> recycling, and so nothing to define the rules.

Okay, sounds believable.

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko


^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [Intel-gfx] [PATCH v2] drm/i915: Check for rq->hwsp validity after acquiring RCU lock
@ 2020-12-18 16:32         ` Tvrtko Ursulin
  0 siblings, 0 replies; 17+ messages in thread
From: Tvrtko Ursulin @ 2020-12-18 16:32 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: stable


On 18/12/2020 16:07, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2020-12-18 15:52:05)
>>
>> On 18/12/2020 12:24, Chris Wilson wrote:
>>> Since we allow removing the timeline map at runtime, there is a risk
>>> that rq->hwsp points into a stale page. To control that risk, we hold
>>> the RCU read lock while reading *rq->hwsp, but we missed a couple of
>>> important barriers. First, the unpinning / removal of the timeline map
>>> must be after all RCU readers into that map are complete, i.e. after an
>>> rcu barrier (in this case courtesy of call_rcu()). Secondly, we must
>>> make sure that the rq->hwsp we are about to dereference under the RCU
>>> lock is valid. In this case, we make the rq->hwsp pointer safe during
>>> i915_request_retire() and so we know that rq->hwsp may become invalid
>>> only after the request has been signaled. Therefore is the request is
>>> not yet signaled when we acquire rq->hwsp under the RCU, we know that
>>> rq->hwsp will remain valid for the duration of the RCU read lock.
>>>
>>> This is a very small window that may lead to either considering the
>>> request not completed (causing a delay until the request is checked
>>> again, any wait for the request is not affected) or dereferencing an
>>> invalid pointer.
>>>
>>> Fixes: 3adac4689f58 ("drm/i915: Introduce concept of per-timeline (context) HWSP")
>>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>>> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>> Cc: <stable@vger.kernel.org> # v5.1+
>>> ---
>>>    drivers/gpu/drm/i915/gt/intel_breadcrumbs.c | 11 ++----
>>>    drivers/gpu/drm/i915/gt/intel_timeline.c    | 10 +++---
>>>    drivers/gpu/drm/i915/i915_request.h         | 37 ++++++++++++++++++---
>>>    3 files changed, 39 insertions(+), 19 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
>>> index 3c62fd6daa76..f96cd7d9b419 100644
>>> --- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
>>> +++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
>>> @@ -134,11 +134,6 @@ static bool remove_signaling_context(struct intel_breadcrumbs *b,
>>>        return true;
>>>    }
>>>    
>>> -static inline bool __request_completed(const struct i915_request *rq)
>>> -{
>>> -     return i915_seqno_passed(__hwsp_seqno(rq), rq->fence.seqno);
>>> -}
>>> -
>>>    __maybe_unused static bool
>>>    check_signal_order(struct intel_context *ce, struct i915_request *rq)
>>>    {
>>> @@ -245,7 +240,7 @@ static void signal_irq_work(struct irq_work *work)
>>>                list_for_each_entry_rcu(rq, &ce->signals, signal_link) {
>>>                        bool release;
>>>    
>>> -                     if (!__request_completed(rq))
>>> +                     if (!__i915_request_is_complete(rq))
>>>                                break;
>>>    
>>>                        if (!test_and_clear_bit(I915_FENCE_FLAG_SIGNAL,
>>> @@ -380,7 +375,7 @@ static void insert_breadcrumb(struct i915_request *rq)
>>>         * straight onto a signaled list, and queue the irq worker for
>>>         * its signal completion.
>>>         */
>>> -     if (__request_completed(rq)) {
>>> +     if (__i915_request_is_complete(rq)) {
>>>                irq_signal_request(rq, b);
>>>                return;
>>>        }
>>> @@ -468,7 +463,7 @@ void i915_request_cancel_breadcrumb(struct i915_request *rq)
>>>        if (release)
>>>                intel_context_put(ce);
>>>    
>>> -     if (__request_completed(rq))
>>> +     if (__i915_request_is_complete(rq))
>>>                irq_signal_request(rq, b);
>>>    
>>>        i915_request_put(rq);
>>> diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.c b/drivers/gpu/drm/i915/gt/intel_timeline.c
>>> index 512afacd2bdc..a005d0165bf4 100644
>>> --- a/drivers/gpu/drm/i915/gt/intel_timeline.c
>>> +++ b/drivers/gpu/drm/i915/gt/intel_timeline.c
>>> @@ -126,6 +126,10 @@ static void __rcu_cacheline_free(struct rcu_head *rcu)
>>>        struct intel_timeline_cacheline *cl =
>>>                container_of(rcu, typeof(*cl), rcu);
>>>    
>>> +     /* Must wait until after all *rq->hwsp are complete before removing */
>>> +     i915_gem_object_unpin_map(cl->hwsp->vma->obj);
>>> +     __idle_hwsp_free(cl->hwsp, ptr_unmask_bits(cl->vaddr, CACHELINE_BITS));
>>> +
>>>        i915_active_fini(&cl->active);
>>>        kfree(cl);
>>>    }
>>> @@ -133,11 +137,6 @@ static void __rcu_cacheline_free(struct rcu_head *rcu)
>>>    static void __idle_cacheline_free(struct intel_timeline_cacheline *cl)
>>>    {
>>>        GEM_BUG_ON(!i915_active_is_idle(&cl->active));
>>> -
>>> -     i915_gem_object_unpin_map(cl->hwsp->vma->obj);
>>> -     i915_vma_put(cl->hwsp->vma);
>>> -     __idle_hwsp_free(cl->hwsp, ptr_unmask_bits(cl->vaddr, CACHELINE_BITS));
>>> -
>>>        call_rcu(&cl->rcu, __rcu_cacheline_free);
>>>    }
>>>    
>>> @@ -179,7 +178,6 @@ cacheline_alloc(struct intel_timeline_hwsp *hwsp, unsigned int cacheline)
>>>                return ERR_CAST(vaddr);
>>>        }
>>>    
>>> -     i915_vma_get(hwsp->vma);
>>>        cl->hwsp = hwsp;
>>>        cl->vaddr = page_pack_bits(vaddr, cacheline);
>>>    
>>> diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
>>> index 92e4320c50c4..7c4453e60323 100644
>>> --- a/drivers/gpu/drm/i915/i915_request.h
>>> +++ b/drivers/gpu/drm/i915/i915_request.h
>>> @@ -440,7 +440,7 @@ static inline u32 hwsp_seqno(const struct i915_request *rq)
>>>    
>>>    static inline bool __i915_request_has_started(const struct i915_request *rq)
>>>    {
>>> -     return i915_seqno_passed(hwsp_seqno(rq), rq->fence.seqno - 1);
>>> +     return i915_seqno_passed(__hwsp_seqno(rq), rq->fence.seqno - 1);
>>>    }
>>>    
>>>    /**
>>> @@ -471,11 +471,19 @@ static inline bool __i915_request_has_started(const struct i915_request *rq)
>>>     */
>>>    static inline bool i915_request_started(const struct i915_request *rq)
>>>    {
>>> +     bool result;
>>> +
>>>        if (i915_request_signaled(rq))
>>>                return true;
>>>    
>>> -     /* Remember: started but may have since been preempted! */
>>> -     return __i915_request_has_started(rq);
>>> +     result = true;
>>> +     rcu_read_lock(); /* the HWSP may be freed at runtime */
>>> +     if (likely(!i915_request_signaled(rq)))
>>> +             /* Remember: started but may have since been preempted! */
>>> +             result = __i915_request_has_started(rq);
>>> +     rcu_read_unlock();
>>> +
>>> +     return result;
>>>    }
>>>    
>>>    /**
>>> @@ -488,10 +496,16 @@ static inline bool i915_request_started(const struct i915_request *rq)
>>>     */
>>>    static inline bool i915_request_is_running(const struct i915_request *rq)
>>>    {
>>> +     bool result;
>>> +
>>>        if (!i915_request_is_active(rq))
>>>                return false;
>>>    
>>> -     return __i915_request_has_started(rq);
>>> +     rcu_read_lock();
>>> +     result = __i915_request_has_started(rq) && i915_request_is_active(rq);
>>> +     rcu_read_unlock();
>>> +
>>> +     return result;
>>>    }
>>>    
>>>    /**
>>> @@ -515,12 +529,25 @@ static inline bool i915_request_is_ready(const struct i915_request *rq)
>>>        return !list_empty(&rq->sched.link);
>>>    }
>>>    
>>> +static inline bool __i915_request_is_complete(const struct i915_request *rq)
>>> +{
>>> +     return i915_seqno_passed(__hwsp_seqno(rq), rq->fence.seqno);
>>> +}
>>> +
>>>    static inline bool i915_request_completed(const struct i915_request *rq)
>>>    {
>>> +     bool result;
>>> +
>>>        if (i915_request_signaled(rq))
>>>                return true;
>>>    
>>> -     return i915_seqno_passed(hwsp_seqno(rq), rq->fence.seqno);
>>> +     result = true;
>>> +     rcu_read_lock(); /* the HWSP may be freed at runtime */
>>> +     if (likely(!i915_request_signaled(rq)))
>>> +             result = __i915_request_is_complete(rq);
>>> +     rcu_read_unlock();
>>> +
>>> +     return result; >   }
>>>    
>>>    static inline void i915_request_mark_complete(struct i915_request *rq)
>>>
>>
>> Should rq->hwsp_seqno be marked as rcu pointer?
> 
> 50:50; we're careful enough with the helper, but we don't always call it
> from under a rcu read lock. Hmm, that used to be true but now the
> interrupt handler is under rcu context. That might not be a huge task to
> annotate now, but still a few places that know the request->hwsp is valid
> since they are before request submit.
> 
>> We reset the fence signaled status before re-assigning the timeline. So
>> if we were to query a request in the process of being allocated, do we
>> need to do something extra to make sure !signaled status is not seen
>> before the rq->hwsp_seqno is replaced? Like should the order of re-init
>> be inverted?
> 
> You're thinking of request reuse. The question is where do we look at
> i915_request_competed outside of an explicit ref. Off the top of my head
> that is only gem_busy. (Similar thought exercise for plain
> i915_request_is_signaled, just less pointer dancing.) gem_busy is
> protected by the seqlock, that is if the dma-resv is changed and the
> fence freed, we restart the inspection.
> 
> So... I don't think we have a use where the order matters across
> recycling, and so nothing to define the rules.

Okay, sounds believable.

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 17+ messages in thread

end of thread, other threads:[~2020-12-18 16:34 UTC | newest]

Thread overview: 17+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-12-18  9:19 [PATCH] drm/i915: Check for rq->hwsp validity after acquiring RCU lock Chris Wilson
2020-12-18  9:19 ` [Intel-gfx] " Chris Wilson
2020-12-18 12:10 ` Chris Wilson
2020-12-18 12:10   ` [Intel-gfx] " Chris Wilson
2020-12-18 12:24 ` [PATCH v2] " Chris Wilson
2020-12-18 12:24   ` [Intel-gfx] " Chris Wilson
2020-12-18 15:52   ` Tvrtko Ursulin
2020-12-18 15:52     ` Tvrtko Ursulin
2020-12-18 16:07     ` Chris Wilson
2020-12-18 16:07       ` Chris Wilson
2020-12-18 16:32       ` Tvrtko Ursulin
2020-12-18 16:32         ` Tvrtko Ursulin
2020-12-18 12:29 ` [Intel-gfx] ✗ Fi.CI.CHECKPATCH: warning for " Patchwork
2020-12-18 12:48 ` [Intel-gfx] ✗ Fi.CI.BAT: failure " Patchwork
2020-12-18 13:26 ` [Intel-gfx] ✗ Fi.CI.CHECKPATCH: warning for drm/i915: Check for rq->hwsp validity after acquiring RCU lock (rev2) Patchwork
2020-12-18 13:56 ` [Intel-gfx] ✓ Fi.CI.BAT: success " Patchwork
2020-12-18 16:25 ` [Intel-gfx] ✓ Fi.CI.IGT: " Patchwork

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.