All of lore.kernel.org
 help / color / mirror / Atom feed
From: Lyude Paul <lyude@redhat.com>
To: dri-devel@lists.freedesktop.org, amd-gfx@lists.freedesktop.org,
	nouveau@lists.freedesktop.org, intel-gfx@lists.freedesktop.org
Cc: "Juston Li" <juston.li@intel.com>,
	"Imre Deak" <imre.deak@intel.com>,
	"Ville Syrjälä" <ville.syrjala@linux.intel.com>,
	"Harry Wentland" <hwentlan@amd.com>,
	"Daniel Vetter" <daniel@ffwll.ch>, "Sean Paul" <sean@poorly.run>,
	"Maarten Lankhorst" <maarten.lankhorst@linux.intel.com>,
	"Maxime Ripard" <mripard@kernel.org>,
	"David Airlie" <airlied@linux.ie>,
	linux-kernel@vger.kernel.org
Subject: [PATCH v5 01/14] drm/dp_mst: Destroy MSTBs asynchronously
Date: Mon, 21 Oct 2019 22:35:56 -0400	[thread overview]
Message-ID: <20191022023641.8026-2-lyude@redhat.com> (raw)
In-Reply-To: <20191022023641.8026-1-lyude@redhat.com>

When reprobing an MST topology during resume, we have to account for the
fact that while we were suspended it's possible that mstbs may have been
removed from any ports in the topology. Since iterating downwards in the
topology requires that we hold &mgr->lock, destroying MSTBs from this
context would result in attempting to lock &mgr->lock a second time and
deadlocking.

So, fix this by first moving destruction of MSTBs into
destroy_connector_work, then rename destroy_connector_work and friends
to reflect that they now destroy both ports and mstbs.

Note that even though this means that MSTBs will still be accessible for
a short period of time between their removal from the topology and
delayed destruction, we are still protected against referencing a MSTB
with a refcount of 0 since we use kref_get_unless_zero() in most places.

Changes since v1:
* s/destroy_connector_list/destroy_port_list/
  s/connector_destroy_lock/delayed_destroy_lock/
  s/connector_destroy_work/delayed_destroy_work/
  s/drm_dp_finish_destroy_branch_device/drm_dp_delayed_destroy_mstb/
  s/drm_dp_finish_destroy_port/drm_dp_delayed_destroy_port/
  - danvet
* Use two loops in drm_dp_delayed_destroy_work() - danvet
* Better explain why we need to do this - danvet
* Use cancel_work_sync() instead of flush_work() - flush_work() doesn't
  account for work requeing

Cc: Juston Li <juston.li@intel.com>
Cc: Imre Deak <imre.deak@intel.com>
Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
Cc: Harry Wentland <hwentlan@amd.com>
Cc: Daniel Vetter <daniel@ffwll.ch>
Signed-off-by: Lyude Paul <lyude@redhat.com>
Reviewed-by: Sean Paul <sean@poorly.run>
---
 drivers/gpu/drm/drm_dp_mst_topology.c | 164 +++++++++++++++++---------
 include/drm/drm_dp_mst_helper.h       |  26 ++--
 2 files changed, 128 insertions(+), 62 deletions(-)

diff --git a/drivers/gpu/drm/drm_dp_mst_topology.c b/drivers/gpu/drm/drm_dp_mst_topology.c
index 9cccc5e63309..66ff226d8c86 100644
--- a/drivers/gpu/drm/drm_dp_mst_topology.c
+++ b/drivers/gpu/drm/drm_dp_mst_topology.c
@@ -1398,34 +1398,17 @@ static void drm_dp_destroy_mst_branch_device(struct kref *kref)
 	struct drm_dp_mst_branch *mstb =
 		container_of(kref, struct drm_dp_mst_branch, topology_kref);
 	struct drm_dp_mst_topology_mgr *mgr = mstb->mgr;
-	struct drm_dp_mst_port *port, *tmp;
-	bool wake_tx = false;
 
-	mutex_lock(&mgr->lock);
-	list_for_each_entry_safe(port, tmp, &mstb->ports, next) {
-		list_del(&port->next);
-		drm_dp_mst_topology_put_port(port);
-	}
-	mutex_unlock(&mgr->lock);
-
-	/* drop any tx slots msg */
-	mutex_lock(&mstb->mgr->qlock);
-	if (mstb->tx_slots[0]) {
-		mstb->tx_slots[0]->state = DRM_DP_SIDEBAND_TX_TIMEOUT;
-		mstb->tx_slots[0] = NULL;
-		wake_tx = true;
-	}
-	if (mstb->tx_slots[1]) {
-		mstb->tx_slots[1]->state = DRM_DP_SIDEBAND_TX_TIMEOUT;
-		mstb->tx_slots[1] = NULL;
-		wake_tx = true;
-	}
-	mutex_unlock(&mstb->mgr->qlock);
+	INIT_LIST_HEAD(&mstb->destroy_next);
 
-	if (wake_tx)
-		wake_up_all(&mstb->mgr->tx_waitq);
-
-	drm_dp_mst_put_mstb_malloc(mstb);
+	/*
+	 * This can get called under mgr->mutex, so we need to perform the
+	 * actual destruction of the mstb in another worker
+	 */
+	mutex_lock(&mgr->delayed_destroy_lock);
+	list_add(&mstb->destroy_next, &mgr->destroy_branch_device_list);
+	mutex_unlock(&mgr->delayed_destroy_lock);
+	schedule_work(&mgr->delayed_destroy_work);
 }
 
 /**
@@ -1540,10 +1523,10 @@ static void drm_dp_destroy_port(struct kref *kref)
 			 * we might be holding the mode_config.mutex
 			 * from an EDID retrieval */
 
-			mutex_lock(&mgr->destroy_connector_lock);
-			list_add(&port->next, &mgr->destroy_connector_list);
-			mutex_unlock(&mgr->destroy_connector_lock);
-			schedule_work(&mgr->destroy_connector_work);
+			mutex_lock(&mgr->delayed_destroy_lock);
+			list_add(&port->next, &mgr->destroy_port_list);
+			mutex_unlock(&mgr->delayed_destroy_lock);
+			schedule_work(&mgr->delayed_destroy_work);
 			return;
 		}
 		/* no need to clean up vcpi
@@ -3085,7 +3068,7 @@ void drm_dp_mst_topology_mgr_suspend(struct drm_dp_mst_topology_mgr *mgr)
 			   DP_MST_EN | DP_UPSTREAM_IS_SRC);
 	mutex_unlock(&mgr->lock);
 	flush_work(&mgr->work);
-	flush_work(&mgr->destroy_connector_work);
+	flush_work(&mgr->delayed_destroy_work);
 }
 EXPORT_SYMBOL(drm_dp_mst_topology_mgr_suspend);
 
@@ -3995,34 +3978,104 @@ static void drm_dp_tx_work(struct work_struct *work)
 	mutex_unlock(&mgr->qlock);
 }
 
-static void drm_dp_destroy_connector_work(struct work_struct *work)
+static inline void
+drm_dp_delayed_destroy_port(struct drm_dp_mst_port *port)
 {
-	struct drm_dp_mst_topology_mgr *mgr = container_of(work, struct drm_dp_mst_topology_mgr, destroy_connector_work);
-	struct drm_dp_mst_port *port;
-	bool send_hotplug = false;
+	port->mgr->cbs->destroy_connector(port->mgr, port->connector);
+
+	drm_dp_port_teardown_pdt(port, port->pdt);
+	port->pdt = DP_PEER_DEVICE_NONE;
+
+	drm_dp_mst_put_port_malloc(port);
+}
+
+static inline void
+drm_dp_delayed_destroy_mstb(struct drm_dp_mst_branch *mstb)
+{
+	struct drm_dp_mst_topology_mgr *mgr = mstb->mgr;
+	struct drm_dp_mst_port *port, *tmp;
+	bool wake_tx = false;
+
+	mutex_lock(&mgr->lock);
+	list_for_each_entry_safe(port, tmp, &mstb->ports, next) {
+		list_del(&port->next);
+		drm_dp_mst_topology_put_port(port);
+	}
+	mutex_unlock(&mgr->lock);
+
+	/* drop any tx slots msg */
+	mutex_lock(&mstb->mgr->qlock);
+	if (mstb->tx_slots[0]) {
+		mstb->tx_slots[0]->state = DRM_DP_SIDEBAND_TX_TIMEOUT;
+		mstb->tx_slots[0] = NULL;
+		wake_tx = true;
+	}
+	if (mstb->tx_slots[1]) {
+		mstb->tx_slots[1]->state = DRM_DP_SIDEBAND_TX_TIMEOUT;
+		mstb->tx_slots[1] = NULL;
+		wake_tx = true;
+	}
+	mutex_unlock(&mstb->mgr->qlock);
+
+	if (wake_tx)
+		wake_up_all(&mstb->mgr->tx_waitq);
+
+	drm_dp_mst_put_mstb_malloc(mstb);
+}
+
+static void drm_dp_delayed_destroy_work(struct work_struct *work)
+{
+	struct drm_dp_mst_topology_mgr *mgr =
+		container_of(work, struct drm_dp_mst_topology_mgr,
+			     delayed_destroy_work);
+	bool send_hotplug = false, go_again;
+
 	/*
 	 * Not a regular list traverse as we have to drop the destroy
-	 * connector lock before destroying the connector, to avoid AB->BA
+	 * connector lock before destroying the mstb/port, to avoid AB->BA
 	 * ordering between this lock and the config mutex.
 	 */
-	for (;;) {
-		mutex_lock(&mgr->destroy_connector_lock);
-		port = list_first_entry_or_null(&mgr->destroy_connector_list, struct drm_dp_mst_port, next);
-		if (!port) {
-			mutex_unlock(&mgr->destroy_connector_lock);
-			break;
+	do {
+		go_again = false;
+
+		for (;;) {
+			struct drm_dp_mst_branch *mstb;
+
+			mutex_lock(&mgr->delayed_destroy_lock);
+			mstb = list_first_entry_or_null(&mgr->destroy_branch_device_list,
+							struct drm_dp_mst_branch,
+							destroy_next);
+			if (mstb)
+				list_del(&mstb->destroy_next);
+			mutex_unlock(&mgr->delayed_destroy_lock);
+
+			if (!mstb)
+				break;
+
+			drm_dp_delayed_destroy_mstb(mstb);
+			go_again = true;
 		}
-		list_del(&port->next);
-		mutex_unlock(&mgr->destroy_connector_lock);
 
-		mgr->cbs->destroy_connector(mgr, port->connector);
+		for (;;) {
+			struct drm_dp_mst_port *port;
 
-		drm_dp_port_teardown_pdt(port, port->pdt);
-		port->pdt = DP_PEER_DEVICE_NONE;
+			mutex_lock(&mgr->delayed_destroy_lock);
+			port = list_first_entry_or_null(&mgr->destroy_port_list,
+							struct drm_dp_mst_port,
+							next);
+			if (port)
+				list_del(&port->next);
+			mutex_unlock(&mgr->delayed_destroy_lock);
+
+			if (!port)
+				break;
+
+			drm_dp_delayed_destroy_port(port);
+			send_hotplug = true;
+			go_again = true;
+		}
+	} while (go_again);
 
-		drm_dp_mst_put_port_malloc(port);
-		send_hotplug = true;
-	}
 	if (send_hotplug)
 		drm_kms_helper_hotplug_event(mgr->dev);
 }
@@ -4209,12 +4262,13 @@ int drm_dp_mst_topology_mgr_init(struct drm_dp_mst_topology_mgr *mgr,
 	mutex_init(&mgr->lock);
 	mutex_init(&mgr->qlock);
 	mutex_init(&mgr->payload_lock);
-	mutex_init(&mgr->destroy_connector_lock);
+	mutex_init(&mgr->delayed_destroy_lock);
 	INIT_LIST_HEAD(&mgr->tx_msg_downq);
-	INIT_LIST_HEAD(&mgr->destroy_connector_list);
+	INIT_LIST_HEAD(&mgr->destroy_port_list);
+	INIT_LIST_HEAD(&mgr->destroy_branch_device_list);
 	INIT_WORK(&mgr->work, drm_dp_mst_link_probe_work);
 	INIT_WORK(&mgr->tx_work, drm_dp_tx_work);
-	INIT_WORK(&mgr->destroy_connector_work, drm_dp_destroy_connector_work);
+	INIT_WORK(&mgr->delayed_destroy_work, drm_dp_delayed_destroy_work);
 	init_waitqueue_head(&mgr->tx_waitq);
 	mgr->dev = dev;
 	mgr->aux = aux;
@@ -4255,7 +4309,7 @@ void drm_dp_mst_topology_mgr_destroy(struct drm_dp_mst_topology_mgr *mgr)
 {
 	drm_dp_mst_topology_mgr_set_mst(mgr, false);
 	flush_work(&mgr->work);
-	flush_work(&mgr->destroy_connector_work);
+	cancel_work_sync(&mgr->delayed_destroy_work);
 	mutex_lock(&mgr->payload_lock);
 	kfree(mgr->payloads);
 	mgr->payloads = NULL;
@@ -4267,7 +4321,7 @@ void drm_dp_mst_topology_mgr_destroy(struct drm_dp_mst_topology_mgr *mgr)
 	drm_atomic_private_obj_fini(&mgr->base);
 	mgr->funcs = NULL;
 
-	mutex_destroy(&mgr->destroy_connector_lock);
+	mutex_destroy(&mgr->delayed_destroy_lock);
 	mutex_destroy(&mgr->payload_lock);
 	mutex_destroy(&mgr->qlock);
 	mutex_destroy(&mgr->lock);
diff --git a/include/drm/drm_dp_mst_helper.h b/include/drm/drm_dp_mst_helper.h
index 4a25e0577ae0..b2160c366fb7 100644
--- a/include/drm/drm_dp_mst_helper.h
+++ b/include/drm/drm_dp_mst_helper.h
@@ -143,6 +143,12 @@ struct drm_dp_mst_branch {
 	 */
 	struct kref malloc_kref;
 
+	/**
+	 * @destroy_next: linked-list entry used by
+	 * drm_dp_delayed_destroy_work()
+	 */
+	struct list_head destroy_next;
+
 	u8 rad[8];
 	u8 lct;
 	int num_ports;
@@ -571,18 +577,24 @@ struct drm_dp_mst_topology_mgr {
 	struct work_struct tx_work;
 
 	/**
-	 * @destroy_connector_list: List of to be destroyed connectors.
+	 * @destroy_port_list: List of to be destroyed connectors.
+	 */
+	struct list_head destroy_port_list;
+	/**
+	 * @destroy_branch_device_list: List of to be destroyed branch
+	 * devices.
 	 */
-	struct list_head destroy_connector_list;
+	struct list_head destroy_branch_device_list;
 	/**
-	 * @destroy_connector_lock: Protects @connector_list.
+	 * @delayed_destroy_lock: Protects @destroy_port_list and
+	 * @destroy_branch_device_list.
 	 */
-	struct mutex destroy_connector_lock;
+	struct mutex delayed_destroy_lock;
 	/**
-	 * @destroy_connector_work: Work item to destroy connectors. Needed to
-	 * avoid locking inversion.
+	 * @delayed_destroy_work: Work item to destroy MST port and branch
+	 * devices, needed to avoid locking inversion.
 	 */
-	struct work_struct destroy_connector_work;
+	struct work_struct delayed_destroy_work;
 };
 
 int drm_dp_mst_topology_mgr_init(struct drm_dp_mst_topology_mgr *mgr,
-- 
2.21.0


  reply	other threads:[~2019-10-22  2:39 UTC|newest]

Thread overview: 27+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-10-22  2:35 [PATCH v5 00/14] DP MST Refactors + debugging tools + suspend/resume reprobing Lyude Paul
2019-10-22  2:35 ` Lyude Paul [this message]
2019-10-22  2:35 ` [PATCH v5 02/14] drm/dp_mst: Remove PDT teardown in drm_dp_destroy_port() and refactor Lyude Paul
2019-10-22  2:35 ` [PATCH v5 03/14] drm/dp_mst: Refactor pdt setup/teardown, add more locking Lyude Paul
2019-10-22  2:35 ` [PATCH v5 04/14] drm/dp_mst: Handle UP requests asynchronously Lyude Paul
2019-10-22  2:36 ` [PATCH v5 05/14] drm/dp_mst: Add probe_lock Lyude Paul
2019-10-22 16:06   ` Sean Paul
2019-10-22  2:36 ` [PATCH v5 06/14] drm/dp_mst: Protect drm_dp_mst_port members with locking Lyude Paul
2019-10-22  2:36   ` Lyude Paul
2019-10-22 20:08   ` Sean Paul
2019-10-22 20:08     ` Sean Paul
2019-10-22  2:36 ` [PATCH v5 07/14] drm/dp_mst: Don't forget to update port->input in drm_dp_mst_handle_conn_stat() Lyude Paul
2019-10-22 20:14   ` Sean Paul
2019-10-22  2:36 ` [PATCH v5 08/14] drm/dp_mst: Lessen indenting in drm_dp_mst_topology_mgr_resume() Lyude Paul
2019-10-22  2:36 ` [PATCH v5 09/14] drm/nouveau: Don't grab runtime PM refs for HPD IRQs Lyude Paul
2019-10-22  2:36 ` [PATCH v5 10/14] drm/nouveau: Resume hotplug interrupts earlier Lyude Paul
2019-10-22  2:36 ` [PATCH v5 11/14] drm/amdgpu: Iterate through DRM connectors correctly Lyude Paul
2019-10-22  2:36   ` Lyude Paul
2019-10-22  2:36 ` [PATCH v5 12/14] drm/amdgpu/dm: Resume short HPD IRQs before resuming MST topology Lyude Paul
2019-10-22  2:36 ` [PATCH v5 13/14] drm/dp_mst: Add basic topology reprobing when resuming Lyude Paul
2019-10-22  2:36   ` Lyude Paul
2019-10-22  2:36 ` [PATCH v5 14/14] drm/dp_mst: Add topology ref history tracking for debugging Lyude Paul
2019-10-22  2:36   ` Lyude Paul
2019-10-22  2:54 ` ✗ Fi.CI.CHECKPATCH: warning for DP MST Refactors + debugging tools + suspend/resume reprobing Patchwork
2019-10-22  3:02 ` ✗ Fi.CI.SPARSE: " Patchwork
2019-10-22  3:19 ` ✓ Fi.CI.BAT: success " Patchwork
2019-10-22 12:46 ` ✓ Fi.CI.IGT: " Patchwork

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20191022023641.8026-2-lyude@redhat.com \
    --to=lyude@redhat.com \
    --cc=airlied@linux.ie \
    --cc=amd-gfx@lists.freedesktop.org \
    --cc=daniel@ffwll.ch \
    --cc=dri-devel@lists.freedesktop.org \
    --cc=hwentlan@amd.com \
    --cc=imre.deak@intel.com \
    --cc=intel-gfx@lists.freedesktop.org \
    --cc=juston.li@intel.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=maarten.lankhorst@linux.intel.com \
    --cc=mripard@kernel.org \
    --cc=nouveau@lists.freedesktop.org \
    --cc=sean@poorly.run \
    --cc=ville.syrjala@linux.intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.