linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v2 1/2] drm/nouveau: Don't enabling polling twice on runtime resume
@ 2017-01-12  2:25 Lyude
  2017-01-12  2:25 ` [PATCH 2/2] drm/nouveau: Handle fbcon suspend/resume in seperate worker Lyude
  0 siblings, 1 reply; 3+ messages in thread
From: Lyude @ 2017-01-12  2:25 UTC (permalink / raw)
  To: nouveau
  Cc: Lyude, Hans de Goede, Kilian Singer, Lukas Wunner, David Airlie,
	Ben Skeggs, David Airlie, dri-devel, linux-kernel

As it turns out, on cards that actually have CRTCs on them we're already
calling drm_kms_helper_poll_enable(drm_dev) from
nouveau_display_resume() before we call it in
nouveau_pmops_runtime_resume(). This leads us to accidentally trying to
enable polling twice, which results in a potential deadlock between the
RPM locks and drm_dev->mode_config.mutex if we end up trying to enable
polling the second time while output_poll_execute is running and holding
the mode_config lock. As such, make sure we only enable polling in
nouveau_pmops_runtime_resume() if we need to.

This fixes hangs observed on the ThinkPad W541

Signed-off-by: Lyude <lyude@redhat.com>
Cc: Hans de Goede <hdegoede@redhat.com>
Cc: Kilian Singer <kilian.singer@quantumtechnology.info>
Cc: Lukas Wunner <lukas@wunner.de>
Cc: David Airlie <airlied@redhat.com>
---
Changes since v1:
 - Rebase to work with master

 drivers/gpu/drm/nouveau/nouveau_display.c | 3 ++-
 drivers/gpu/drm/nouveau/nouveau_drm.c     | 5 ++++-
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/nouveau/nouveau_display.c b/drivers/gpu/drm/nouveau/nouveau_display.c
index cef08da..6a15776 100644
--- a/drivers/gpu/drm/nouveau/nouveau_display.c
+++ b/drivers/gpu/drm/nouveau/nouveau_display.c
@@ -411,7 +411,8 @@ nouveau_display_init(struct drm_device *dev)
 		return ret;
 
 	/* enable polling for external displays */
-	drm_kms_helper_poll_enable(dev);
+	if (!dev->mode_config.poll_enabled)
+		drm_kms_helper_poll_enable(dev);
 
 	/* enable hotplug interrupts */
 	list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
diff --git a/drivers/gpu/drm/nouveau/nouveau_drm.c b/drivers/gpu/drm/nouveau/nouveau_drm.c
index 59348fc..bc85a45 100644
--- a/drivers/gpu/drm/nouveau/nouveau_drm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_drm.c
@@ -773,7 +773,10 @@ nouveau_pmops_runtime_resume(struct device *dev)
 	pci_set_master(pdev);
 
 	ret = nouveau_do_resume(drm_dev, true);
-	drm_kms_helper_poll_enable(drm_dev);
+
+	if (!drm_dev->mode_config.poll_enabled)
+		drm_kms_helper_poll_enable(drm_dev);
+
 	/* do magic */
 	nvif_mask(&device->object, 0x088488, (1 << 25), (1 << 25));
 	vga_switcheroo_set_dynamic_switch(pdev, VGA_SWITCHEROO_ON);
-- 
2.9.3

^ permalink raw reply related	[flat|nested] 3+ messages in thread

* [PATCH 2/2] drm/nouveau: Handle fbcon suspend/resume in seperate worker
  2017-01-12  2:25 [PATCH v2 1/2] drm/nouveau: Don't enabling polling twice on runtime resume Lyude
@ 2017-01-12  2:25 ` Lyude
  2017-01-12  8:27   ` Hans de Goede
  0 siblings, 1 reply; 3+ messages in thread
From: Lyude @ 2017-01-12  2:25 UTC (permalink / raw)
  To: nouveau
  Cc: Lyude, Hans de Goede, Kilian Singer, Lukas Wunner, David Airlie,
	Ben Skeggs, David Airlie, dri-devel, linux-kernel

Resuming from RPM can happen while already holding
dev->mode_config.mutex. This means we can't actually handle fbcon in
any RPM resume workers, since restoring fbcon requires grabbing
dev->mode_config.mutex again. So move the fbcon suspend/resume code into
it's own worker, and rely on that instead to avoid deadlocking.

This fixes more deadlocks for runtime suspending the GPU on the ThinkPad
W541. Reproduction recipe:

 - Get a machine with both optimus and a nvidia card with connectors
   attached to it
 - Wait for the nvidia GPU to suspend
 - Attempt to manually reprobe any of the connectors on the nvidia GPU
   using sysfs
 - *deadlock*

Signed-off-by: Lyude <lyude@redhat.com>
Cc: Hans de Goede <hdegoede@redhat.com>
Cc: Kilian Singer <kilian.singer@quantumtechnology.info>
Cc: Lukas Wunner <lukas@wunner.de>
Cc: David Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/nouveau/nouveau_drv.h   |  2 ++
 drivers/gpu/drm/nouveau/nouveau_fbcon.c | 43 ++++++++++++++++++++++++++-------
 2 files changed, 36 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.h b/drivers/gpu/drm/nouveau/nouveau_drv.h
index 8d5ed5b..42c1fa5 100644
--- a/drivers/gpu/drm/nouveau/nouveau_drv.h
+++ b/drivers/gpu/drm/nouveau/nouveau_drv.h
@@ -165,6 +165,8 @@ struct nouveau_drm {
 	struct backlight_device *backlight;
 	struct list_head bl_connectors;
 	struct work_struct hpd_work;
+	struct work_struct fbcon_work;
+	int fbcon_new_state;
 #ifdef CONFIG_ACPI
 	struct notifier_block acpi_nb;
 #endif
diff --git a/drivers/gpu/drm/nouveau/nouveau_fbcon.c b/drivers/gpu/drm/nouveau/nouveau_fbcon.c
index 2f2a3dc..87cd30b 100644
--- a/drivers/gpu/drm/nouveau/nouveau_fbcon.c
+++ b/drivers/gpu/drm/nouveau/nouveau_fbcon.c
@@ -470,19 +470,43 @@ static const struct drm_fb_helper_funcs nouveau_fbcon_helper_funcs = {
 	.fb_probe = nouveau_fbcon_create,
 };
 
+static void
+nouveau_fbcon_set_suspend_work(struct work_struct *work)
+{
+	struct nouveau_drm *drm = container_of(work, typeof(*drm), fbcon_work);
+	int state = drm->fbcon_new_state;
+
+	if (state == FBINFO_STATE_RUNNING)
+		pm_runtime_get_sync(drm->dev->dev);
+
+	console_lock();
+	if (state == FBINFO_STATE_RUNNING)
+		nouveau_fbcon_accel_restore(drm->dev);
+	drm_fb_helper_set_suspend(&drm->fbcon->helper, state);
+	if (state != FBINFO_STATE_RUNNING)
+		nouveau_fbcon_accel_save_disable(drm->dev);
+	console_unlock();
+
+	if (state == FBINFO_STATE_RUNNING) {
+		pm_runtime_mark_last_busy(drm->dev->dev);
+		pm_runtime_put_sync(drm->dev->dev);
+	}
+}
+
 void
 nouveau_fbcon_set_suspend(struct drm_device *dev, int state)
 {
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	if (drm->fbcon) {
-		console_lock();
-		if (state == FBINFO_STATE_RUNNING)
-			nouveau_fbcon_accel_restore(dev);
-		drm_fb_helper_set_suspend(&drm->fbcon->helper, state);
-		if (state != FBINFO_STATE_RUNNING)
-			nouveau_fbcon_accel_save_disable(dev);
-		console_unlock();
-	}
+
+	if (!drm->fbcon)
+		return;
+
+	drm->fbcon_new_state = state;
+	/* Since runtime resume can happen as a result of a sysfs operation,
+	 * it's possible we already have the console locked. So handle fbcon
+	 * init/deinit from a seperate work thread
+	 */
+	schedule_work(&drm->fbcon_work);
 }
 
 int
@@ -502,6 +526,7 @@ nouveau_fbcon_init(struct drm_device *dev)
 		return -ENOMEM;
 
 	drm->fbcon = fbcon;
+	INIT_WORK(&drm->fbcon_work, nouveau_fbcon_set_suspend_work);
 
 	drm_fb_helper_prepare(dev, &fbcon->helper, &nouveau_fbcon_helper_funcs);
 
-- 
2.9.3

^ permalink raw reply related	[flat|nested] 3+ messages in thread

* Re: [PATCH 2/2] drm/nouveau: Handle fbcon suspend/resume in seperate worker
  2017-01-12  2:25 ` [PATCH 2/2] drm/nouveau: Handle fbcon suspend/resume in seperate worker Lyude
@ 2017-01-12  8:27   ` Hans de Goede
  0 siblings, 0 replies; 3+ messages in thread
From: Hans de Goede @ 2017-01-12  8:27 UTC (permalink / raw)
  To: Lyude, nouveau
  Cc: Kilian Singer, Lukas Wunner, David Airlie, Ben Skeggs,
	David Airlie, dri-devel, linux-kernel

Hi,

Good catch (both the previous patch as well as this one).
I've one small comment inline:

On 12-01-17 03:25, Lyude wrote:
> Resuming from RPM can happen while already holding
> dev->mode_config.mutex. This means we can't actually handle fbcon in
> any RPM resume workers, since restoring fbcon requires grabbing
> dev->mode_config.mutex again. So move the fbcon suspend/resume code into
> it's own worker, and rely on that instead to avoid deadlocking.
>
> This fixes more deadlocks for runtime suspending the GPU on the ThinkPad
> W541. Reproduction recipe:
>
>  - Get a machine with both optimus and a nvidia card with connectors
>    attached to it
>  - Wait for the nvidia GPU to suspend
>  - Attempt to manually reprobe any of the connectors on the nvidia GPU
>    using sysfs
>  - *deadlock*
>
> Signed-off-by: Lyude <lyude@redhat.com>
> Cc: Hans de Goede <hdegoede@redhat.com>
> Cc: Kilian Singer <kilian.singer@quantumtechnology.info>
> Cc: Lukas Wunner <lukas@wunner.de>
> Cc: David Airlie <airlied@redhat.com>
> ---
>  drivers/gpu/drm/nouveau/nouveau_drv.h   |  2 ++
>  drivers/gpu/drm/nouveau/nouveau_fbcon.c | 43 ++++++++++++++++++++++++++-------
>  2 files changed, 36 insertions(+), 9 deletions(-)
>
> diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.h b/drivers/gpu/drm/nouveau/nouveau_drv.h
> index 8d5ed5b..42c1fa5 100644
> --- a/drivers/gpu/drm/nouveau/nouveau_drv.h
> +++ b/drivers/gpu/drm/nouveau/nouveau_drv.h
> @@ -165,6 +165,8 @@ struct nouveau_drm {
>  	struct backlight_device *backlight;
>  	struct list_head bl_connectors;
>  	struct work_struct hpd_work;
> +	struct work_struct fbcon_work;
> +	int fbcon_new_state;
>  #ifdef CONFIG_ACPI
>  	struct notifier_block acpi_nb;
>  #endif
> diff --git a/drivers/gpu/drm/nouveau/nouveau_fbcon.c b/drivers/gpu/drm/nouveau/nouveau_fbcon.c
> index 2f2a3dc..87cd30b 100644
> --- a/drivers/gpu/drm/nouveau/nouveau_fbcon.c
> +++ b/drivers/gpu/drm/nouveau/nouveau_fbcon.c
> @@ -470,19 +470,43 @@ static const struct drm_fb_helper_funcs nouveau_fbcon_helper_funcs = {
>  	.fb_probe = nouveau_fbcon_create,
>  };
>
> +static void
> +nouveau_fbcon_set_suspend_work(struct work_struct *work)
> +{
> +	struct nouveau_drm *drm = container_of(work, typeof(*drm), fbcon_work);
> +	int state = drm->fbcon_new_state;

The compiler may decide to optimize away this variable
and simply use drm->fbcon_new_state in the if-s below,
which is racy. I would fix this by making
drm->fbcon_new_state an atomic_t and using

atomic_read(&drm->fbcon_new_state) here.
(and atomic_set below).

Regards,

Hans



> +
> +	if (state == FBINFO_STATE_RUNNING)
> +		pm_runtime_get_sync(drm->dev->dev);
> +
> +	console_lock();
> +	if (state == FBINFO_STATE_RUNNING)
> +		nouveau_fbcon_accel_restore(drm->dev);
> +	drm_fb_helper_set_suspend(&drm->fbcon->helper, state);
> +	if (state != FBINFO_STATE_RUNNING)
> +		nouveau_fbcon_accel_save_disable(drm->dev);
> +	console_unlock();
> +
> +	if (state == FBINFO_STATE_RUNNING) {
> +		pm_runtime_mark_last_busy(drm->dev->dev);
> +		pm_runtime_put_sync(drm->dev->dev);
> +	}
> +}
> +
>  void
>  nouveau_fbcon_set_suspend(struct drm_device *dev, int state)
>  {
>  	struct nouveau_drm *drm = nouveau_drm(dev);
> -	if (drm->fbcon) {
> -		console_lock();
> -		if (state == FBINFO_STATE_RUNNING)
> -			nouveau_fbcon_accel_restore(dev);
> -		drm_fb_helper_set_suspend(&drm->fbcon->helper, state);
> -		if (state != FBINFO_STATE_RUNNING)
> -			nouveau_fbcon_accel_save_disable(dev);
> -		console_unlock();
> -	}
> +
> +	if (!drm->fbcon)
> +		return;
> +
> +	drm->fbcon_new_state = state;
> +	/* Since runtime resume can happen as a result of a sysfs operation,
> +	 * it's possible we already have the console locked. So handle fbcon
> +	 * init/deinit from a seperate work thread
> +	 */
> +	schedule_work(&drm->fbcon_work);
>  }
>
>  int
> @@ -502,6 +526,7 @@ nouveau_fbcon_init(struct drm_device *dev)
>  		return -ENOMEM;
>
>  	drm->fbcon = fbcon;
> +	INIT_WORK(&drm->fbcon_work, nouveau_fbcon_set_suspend_work);
>
>  	drm_fb_helper_prepare(dev, &fbcon->helper, &nouveau_fbcon_helper_funcs);
>
>

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2017-01-12  8:27 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-01-12  2:25 [PATCH v2 1/2] drm/nouveau: Don't enabling polling twice on runtime resume Lyude
2017-01-12  2:25 ` [PATCH 2/2] drm/nouveau: Handle fbcon suspend/resume in seperate worker Lyude
2017-01-12  8:27   ` Hans de Goede

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).