linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Sultan Alsawaf <sultan@kerneltoast.com>
To: unlisted-recipients:; (no To-header on input)
Cc: Sultan Alsawaf <sultan@kerneltoast.com>,
	stable@vger.kernel.org, Jani Nikula <jani.nikula@linux.intel.com>,
	Joonas Lahtinen <joonas.lahtinen@linux.intel.com>,
	Rodrigo Vivi <rodrigo.vivi@intel.com>,
	David Airlie <airlied@linux.ie>, Daniel Vetter <daniel@ffwll.ch>,
	Chris Wilson <chris@chris-wilson.co.uk>,
	Matthew Auld <matthew.auld@intel.com>,
	intel-gfx@lists.freedesktop.org, dri-devel@lists.freedesktop.org,
	linux-kernel@vger.kernel.org
Subject: [PATCH v4] drm/i915: Synchronize active and retire callbacks
Date: Mon,  6 Apr 2020 23:40:06 -0700	[thread overview]
Message-ID: <20200407064007.7599-1-sultan@kerneltoast.com> (raw)
In-Reply-To: <20200404024156.GA10382@sultan-box.localdomain>

From: Sultan Alsawaf <sultan@kerneltoast.com>

Active and retire callbacks can run simultaneously, causing panics and
mayhem. The most notable case is with the intel_context_pin/unpin race
that causes ring and page table corruption. In 5.4, this race is more
noticeable because intel_ring_unpin() sets ring->vaddr to NULL and
causes a clean NULL-pointer-dereference panic, but in newer kernels this
race goes unnoticed.

Here is an example of a crash caused by this race on 5.4:
BUG: unable to handle page fault for address: 0000000000003448
RIP: 0010:gen8_emit_flush_render+0x163/0x190
Call Trace:
 execlists_request_alloc+0x25/0x40
 __i915_request_create+0x1f4/0x2c0
 i915_request_create+0x71/0xc0
 i915_gem_do_execbuffer+0xb98/0x1a80
 ? preempt_count_add+0x68/0xa0
 ? _raw_spin_lock+0x13/0x30
 ? _raw_spin_unlock+0x16/0x30
 i915_gem_execbuffer2_ioctl+0x1de/0x3c0
 ? i915_gem_busy_ioctl+0x7f/0x1d0
 ? i915_gem_execbuffer_ioctl+0x2d0/0x2d0
 drm_ioctl_kernel+0xb2/0x100
 drm_ioctl+0x209/0x360
 ? i915_gem_execbuffer_ioctl+0x2d0/0x2d0
 ksys_ioctl+0x87/0xc0
 __x64_sys_ioctl+0x16/0x20
 do_syscall_64+0x4e/0x150
 entry_SYSCALL_64_after_hwframe+0x44/0xa9

Protect the active and retire callbacks with their own lock to prevent
them from running at the same time as one another.

Fixes: 12c255b5dad1 ("drm/i915: Provide an i915_active.acquire callback")
Cc: <stable@vger.kernel.org>
Signed-off-by: Sultan Alsawaf <sultan@kerneltoast.com>
---
 drivers/gpu/drm/i915/i915_active.c       | 52 ++++++++++++++++++++----
 drivers/gpu/drm/i915/i915_active.h       | 10 ++---
 drivers/gpu/drm/i915/i915_active_types.h |  2 +
 3 files changed, 50 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_active.c b/drivers/gpu/drm/i915/i915_active.c
index b0a499753526..5802233f71ec 100644
--- a/drivers/gpu/drm/i915/i915_active.c
+++ b/drivers/gpu/drm/i915/i915_active.c
@@ -147,8 +147,22 @@ __active_retire(struct i915_active *ref)
 	spin_unlock_irqrestore(&ref->tree_lock, flags);
 
 	/* After the final retire, the entire struct may be freed */
-	if (ref->retire)
-		ref->retire(ref);
+	if (ref->retire) {
+		if (ref->active) {
+			bool freed = false;
+
+			/* Don't race with the active callback, and avoid UaF */
+			down_write(&ref->rwsem);
+			ref->freed = &freed;
+			ref->retire(ref);
+			if (!freed) {
+				ref->freed = NULL;
+				up_write(&ref->rwsem);
+			}
+		} else {
+			ref->retire(ref);
+		}
+	}
 
 	/* ... except if you wait on it, you must manage your own references! */
 	wake_up_var(ref);
@@ -278,7 +292,8 @@ void __i915_active_init(struct i915_active *ref,
 			int (*active)(struct i915_active *ref),
 			void (*retire)(struct i915_active *ref),
 			struct lock_class_key *mkey,
-			struct lock_class_key *wkey)
+			struct lock_class_key *wkey,
+			struct lock_class_key *rkey)
 {
 	unsigned long bits;
 
@@ -287,8 +302,13 @@ void __i915_active_init(struct i915_active *ref,
 	ref->flags = 0;
 	ref->active = active;
 	ref->retire = ptr_unpack_bits(retire, &bits, 2);
-	if (bits & I915_ACTIVE_MAY_SLEEP)
+	ref->freed = NULL;
+	if (ref->active && ref->retire) {
+		__init_rwsem(&ref->rwsem, "i915_active.rwsem", rkey);
 		ref->flags |= I915_ACTIVE_RETIRE_SLEEPS;
+	} else if (bits & I915_ACTIVE_MAY_SLEEP) {
+		ref->flags |= I915_ACTIVE_RETIRE_SLEEPS;
+	}
 
 	spin_lock_init(&ref->tree_lock);
 	ref->tree = RB_ROOT;
@@ -417,8 +437,20 @@ int i915_active_acquire(struct i915_active *ref)
 		return err;
 
 	if (likely(!i915_active_acquire_if_busy(ref))) {
-		if (ref->active)
-			err = ref->active(ref);
+		if (ref->active) {
+			if (ref->retire) {
+				/*
+				 * This can be a recursive call, and the mutex
+				 * above already protects from concurrent active
+				 * callbacks, so a read lock fits best.
+				 */
+				down_read(&ref->rwsem);
+				err = ref->active(ref);
+				up_read(&ref->rwsem);
+			} else {
+				err = ref->active(ref);
+			}
+		}
 		if (!err) {
 			spin_lock_irq(&ref->tree_lock); /* __active_retire() */
 			debug_active_activate(ref);
@@ -502,16 +534,20 @@ int i915_request_await_active(struct i915_request *rq, struct i915_active *ref)
 	return err;
 }
 
-#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)
 void i915_active_fini(struct i915_active *ref)
 {
+	if (ref->freed) {
+		*ref->freed = true;
+		up_write(&ref->rwsem);
+	}
+#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)
 	debug_active_fini(ref);
 	GEM_BUG_ON(atomic_read(&ref->count));
 	GEM_BUG_ON(work_pending(&ref->work));
 	GEM_BUG_ON(!RB_EMPTY_ROOT(&ref->tree));
 	mutex_destroy(&ref->mutex);
-}
 #endif
+}
 
 static inline bool is_idle_barrier(struct active_node *node, u64 idx)
 {
diff --git a/drivers/gpu/drm/i915/i915_active.h b/drivers/gpu/drm/i915/i915_active.h
index 51e1e854ca55..b684b1fdcc02 100644
--- a/drivers/gpu/drm/i915/i915_active.h
+++ b/drivers/gpu/drm/i915/i915_active.h
@@ -153,14 +153,16 @@ void __i915_active_init(struct i915_active *ref,
 			int (*active)(struct i915_active *ref),
 			void (*retire)(struct i915_active *ref),
 			struct lock_class_key *mkey,
-			struct lock_class_key *wkey);
+			struct lock_class_key *wkey,
+			struct lock_class_key *rkey);
 
 /* Specialise each class of i915_active to avoid impossible lockdep cycles. */
 #define i915_active_init(ref, active, retire) do {		\
 	static struct lock_class_key __mkey;				\
 	static struct lock_class_key __wkey;				\
+	static struct lock_class_key __rkey;				\
 									\
-	__i915_active_init(ref, active, retire, &__mkey, &__wkey);	\
+	__i915_active_init(ref, active, retire, &__mkey, &__wkey, &__rkey);	\
 } while (0)
 
 int i915_active_ref(struct i915_active *ref,
@@ -200,11 +202,7 @@ i915_active_is_idle(const struct i915_active *ref)
 	return !atomic_read(&ref->count);
 }
 
-#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)
 void i915_active_fini(struct i915_active *ref);
-#else
-static inline void i915_active_fini(struct i915_active *ref) { }
-#endif
 
 int i915_active_acquire_preallocate_barrier(struct i915_active *ref,
 					    struct intel_engine_cs *engine);
diff --git a/drivers/gpu/drm/i915/i915_active_types.h b/drivers/gpu/drm/i915/i915_active_types.h
index 6360c3e4b765..aaee2548cb19 100644
--- a/drivers/gpu/drm/i915/i915_active_types.h
+++ b/drivers/gpu/drm/i915/i915_active_types.h
@@ -32,6 +32,8 @@ struct active_node;
 struct i915_active {
 	atomic_t count;
 	struct mutex mutex;
+	struct rw_semaphore rwsem;
+	bool *freed;
 
 	spinlock_t tree_lock;
 	struct active_node *cache;
-- 
2.26.0


  reply	other threads:[~2020-04-07  6:40 UTC|newest]

Thread overview: 14+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-04-03  1:13 [PATCH] drm/i915: Fix use-after-free due to intel_context_pin/unpin race Sultan Alsawaf
2020-04-03  4:29 ` [PATCH v2] " Sultan Alsawaf
2020-04-03 22:35   ` [PATCH v3] drm/i915: Synchronize active and retire callbacks Sultan Alsawaf
2020-04-04  2:41     ` Sultan Alsawaf
2020-04-07  6:40       ` Sultan Alsawaf [this message]
2020-04-14  6:13         ` [PATCH v4] " Sultan Alsawaf
2020-04-14  8:23           ` Chris Wilson
2020-04-14 14:43             ` Sultan Alsawaf
2020-04-20  5:24               ` Sultan Alsawaf
2020-04-20  8:21                 ` Joonas Lahtinen
2020-04-20 16:15                   ` Sultan Alsawaf
2020-04-21  6:51                     ` Joonas Lahtinen
2020-04-21 15:54                       ` Sultan Alsawaf
2020-04-15  3:29 ` [drm/i915] 6dc0b234a6: BUG:sleeping_function_called_from_invalid_context_at_kernel/locking/mutex.c kernel test robot

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200407064007.7599-1-sultan@kerneltoast.com \
    --to=sultan@kerneltoast.com \
    --cc=airlied@linux.ie \
    --cc=chris@chris-wilson.co.uk \
    --cc=daniel@ffwll.ch \
    --cc=dri-devel@lists.freedesktop.org \
    --cc=intel-gfx@lists.freedesktop.org \
    --cc=jani.nikula@linux.intel.com \
    --cc=joonas.lahtinen@linux.intel.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=matthew.auld@intel.com \
    --cc=rodrigo.vivi@intel.com \
    --cc=stable@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).