All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] drm/amd/display: Add fast path for cursor plane updates
@ 2018-12-05 19:59 Nicholas Kazlauskas
       [not found] ` <20181205195907.9501-1-nicholas.kazlauskas-5C7GfCeVMHo@public.gmane.org>
  0 siblings, 1 reply; 16+ messages in thread
From: Nicholas Kazlauskas @ 2018-12-05 19:59 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW
  Cc: Leo Li, Harry Wentland, Nicholas Kazlauskas

[Why]
Legacy cursor plane updates from drm helpers go through the full
atomic codepath. A high volume of cursor updates through this slow
code path can cause subsequent page-flips to skip vblank intervals
since each individual update is slow.

This problem is particularly noticeable for the compton compositor.

[How]
A fast path for cursor plane updates is added by using DRM asynchronous
commit support provided by async_check and async_update. These don't do
a full state/flip_done dependency stall and they don't block other
commit work.

However, DC still expects itself to be single-threaded for anything
that can issue register writes. Screen corruption or hangs can occur
if write sequences overlap. Every call that potentially perform
register writes needs to be guarded for asynchronous updates to work.
The dc_lock mutex was added for this.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=106175

Cc: Leo Li <sunpeng.li@amd.com>
Cc: Harry Wentland <harry.wentland@amd.com>
Signed-off-by: Nicholas Kazlauskas <nicholas.kazlauskas@amd.com>
---
 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 67 ++++++++++++++++++-
 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h |  8 +++
 2 files changed, 73 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 23d61570df17..4df14a50a8ac 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -57,6 +57,7 @@
 
 #include <drm/drmP.h>
 #include <drm/drm_atomic.h>
+#include <drm/drm_atomic_uapi.h>
 #include <drm/drm_atomic_helper.h>
 #include <drm/drm_dp_mst_helper.h>
 #include <drm/drm_fb_helper.h>
@@ -133,6 +134,8 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state);
 static int amdgpu_dm_atomic_check(struct drm_device *dev,
 				  struct drm_atomic_state *state);
 
+static void handle_cursor_update(struct drm_plane *plane,
+				 struct drm_plane_state *old_plane_state);
 
 
 
@@ -402,6 +405,8 @@ static int amdgpu_dm_init(struct amdgpu_device *adev)
 	/* Zero all the fields */
 	memset(&init_data, 0, sizeof(init_data));
 
+	mutex_init(&adev->dm.dc_lock);
+
 	if(amdgpu_dm_irq_init(adev)) {
 		DRM_ERROR("amdgpu: failed to initialize DM IRQ support.\n");
 		goto error;
@@ -516,6 +521,9 @@ static void amdgpu_dm_fini(struct amdgpu_device *adev)
 	/* DC Destroy TODO: Replace destroy DAL */
 	if (adev->dm.dc)
 		dc_destroy(&adev->dm.dc);
+
+	mutex_destroy(&adev->dm.dc_lock);
+
 	return;
 }
 
@@ -3615,10 +3623,43 @@ static int dm_plane_atomic_check(struct drm_plane *plane,
 	return -EINVAL;
 }
 
+static int dm_plane_atomic_async_check(struct drm_plane *plane,
+				       struct drm_plane_state *new_plane_state)
+{
+	/* Only support async updates on cursor planes. */
+	if (plane->type != DRM_PLANE_TYPE_CURSOR)
+		return -EINVAL;
+
+	return 0;
+}
+
+static void dm_plane_atomic_async_update(struct drm_plane *plane,
+					 struct drm_plane_state *new_state)
+{
+	struct drm_plane_state *old_state =
+		drm_atomic_get_old_plane_state(new_state->state, plane);
+
+	if (plane->state->fb != new_state->fb)
+		drm_atomic_set_fb_for_plane(plane->state, new_state->fb);
+
+	plane->state->src_x = new_state->src_x;
+	plane->state->src_y = new_state->src_y;
+	plane->state->src_w = new_state->src_w;
+	plane->state->src_h = new_state->src_h;
+	plane->state->crtc_x = new_state->crtc_x;
+	plane->state->crtc_y = new_state->crtc_y;
+	plane->state->crtc_w = new_state->crtc_w;
+	plane->state->crtc_h = new_state->crtc_h;
+
+	handle_cursor_update(plane, old_state);
+}
+
 static const struct drm_plane_helper_funcs dm_plane_helper_funcs = {
 	.prepare_fb = dm_plane_helper_prepare_fb,
 	.cleanup_fb = dm_plane_helper_cleanup_fb,
 	.atomic_check = dm_plane_atomic_check,
+	.atomic_async_check = dm_plane_atomic_async_check,
+	.atomic_async_update = dm_plane_atomic_async_update
 };
 
 /*
@@ -4307,6 +4348,7 @@ static int get_cursor_position(struct drm_plane *plane, struct drm_crtc *crtc,
 static void handle_cursor_update(struct drm_plane *plane,
 				 struct drm_plane_state *old_plane_state)
 {
+	struct amdgpu_device *adev = plane->dev->dev_private;
 	struct amdgpu_framebuffer *afb = to_amdgpu_framebuffer(plane->state->fb);
 	struct drm_crtc *crtc = afb ? plane->state->crtc : old_plane_state->crtc;
 	struct dm_crtc_state *crtc_state = crtc ? to_dm_crtc_state(crtc->state) : NULL;
@@ -4331,9 +4373,12 @@ static void handle_cursor_update(struct drm_plane *plane,
 
 	if (!position.enable) {
 		/* turn off cursor */
-		if (crtc_state && crtc_state->stream)
+		if (crtc_state && crtc_state->stream) {
+			mutex_lock(&adev->dm.dc_lock);
 			dc_stream_set_cursor_position(crtc_state->stream,
 						      &position);
+			mutex_unlock(&adev->dm.dc_lock);
+		}
 		return;
 	}
 
@@ -4351,6 +4396,7 @@ static void handle_cursor_update(struct drm_plane *plane,
 	attributes.pitch = attributes.width;
 
 	if (crtc_state->stream) {
+		mutex_lock(&adev->dm.dc_lock);
 		if (!dc_stream_set_cursor_attributes(crtc_state->stream,
 							 &attributes))
 			DRM_ERROR("DC failed to set cursor attributes\n");
@@ -4358,6 +4404,7 @@ static void handle_cursor_update(struct drm_plane *plane,
 		if (!dc_stream_set_cursor_position(crtc_state->stream,
 						   &position))
 			DRM_ERROR("DC failed to set cursor position\n");
+		mutex_unlock(&adev->dm.dc_lock);
 	}
 }
 
@@ -4573,6 +4620,7 @@ static void amdgpu_dm_do_flip(struct drm_crtc *crtc,
 				&acrtc_state->stream->vrr_infopacket;
 	}
 
+	mutex_lock(&adev->dm.dc_lock);
 	dc_commit_updates_for_stream(adev->dm.dc,
 					     surface_updates,
 					     1,
@@ -4580,6 +4628,7 @@ static void amdgpu_dm_do_flip(struct drm_crtc *crtc,
 					     &stream_update,
 					     &surface_updates->surface,
 					     state);
+	mutex_unlock(&adev->dm.dc_lock);
 
 	DRM_DEBUG_DRIVER("%s Flipping to hi: 0x%x, low: 0x%x \n",
 			 __func__,
@@ -4594,6 +4643,7 @@ static void amdgpu_dm_do_flip(struct drm_crtc *crtc,
  * with a dc_plane_state and follow the atomic model a bit more closely here.
  */
 static bool commit_planes_to_stream(
+		struct amdgpu_display_manager *dm,
 		struct dc *dc,
 		struct dc_plane_state **plane_states,
 		uint8_t new_plane_count,
@@ -4670,11 +4720,13 @@ static bool commit_planes_to_stream(
 		updates[i].scaling_info = &scaling_info[i];
 	}
 
+	mutex_lock(&dm->dc_lock);
 	dc_commit_updates_for_stream(
 			dc,
 			updates,
 			new_plane_count,
 			dc_stream, stream_update, plane_states, state);
+	mutex_unlock(&dm->dc_lock);
 
 	kfree(flip_addr);
 	kfree(plane_info);
@@ -4780,7 +4832,8 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state,
 
 		dc_stream_attach->abm_level = acrtc_state->abm_level;
 
-		if (false == commit_planes_to_stream(dm->dc,
+		if (false == commit_planes_to_stream(dm,
+							dm->dc,
 							plane_states_constructed,
 							planes_count,
 							acrtc_state,
@@ -4950,7 +5003,9 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state)
 
 	if (dc_state) {
 		dm_enable_per_frame_crtc_master_sync(dc_state);
+		mutex_lock(&dm->dc_lock);
 		WARN_ON(!dc_commit_state(dm->dc, dc_state));
+		mutex_unlock(&dm->dc_lock);
 	}
 
 	for_each_new_crtc_in_state(state, crtc, new_crtc_state, i) {
@@ -5012,6 +5067,7 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state)
 
 		/*TODO How it works with MPO ?*/
 		if (!commit_planes_to_stream(
+				dm,
 				dm->dc,
 				status->plane_states,
 				status->plane_count,
@@ -5904,6 +5960,13 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev,
 			ret = -EINVAL;
 			goto fail;
 		}
+	} else if (state->legacy_cursor_update) {
+		/*
+		 * This is a fast cursor update coming from the plane update
+		 * helper, check if it can be done asynchronously for better
+		 * performance.
+		 */
+		state->async_update = !drm_atomic_helper_async_check(dev, state);
 	}
 
 	/* Must be success */
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
index 4326dc256491..25bb91ee80ba 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
@@ -134,6 +134,14 @@ struct amdgpu_display_manager {
 
 	struct drm_modeset_lock atomic_obj_lock;
 
+	/**
+	 * @dc_lock:
+	 *
+	 * Guards access to DC functions that can issue register write
+	 * sequences.
+	 */
+	struct mutex dc_lock;
+
 	/**
 	 * @irq_handler_list_low_tab:
 	 *
-- 
2.17.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* Re: [PATCH] drm/amd/display: Add fast path for cursor plane updates
       [not found] ` <20181205195907.9501-1-nicholas.kazlauskas-5C7GfCeVMHo@public.gmane.org>
@ 2018-12-05 20:26   ` Grodzovsky, Andrey
       [not found]     ` <8b305d43-56e1-5505-95d3-35e92f3f8494-5C7GfCeVMHo@public.gmane.org>
  2018-12-11 22:30   ` Li, Sun peng (Leo)
  2018-12-13 15:48   ` Michel Dänzer
  2 siblings, 1 reply; 16+ messages in thread
From: Grodzovsky, Andrey @ 2018-12-05 20:26 UTC (permalink / raw)
  To: Kazlauskas, Nicholas, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW
  Cc: Li, Sun peng (Leo), Wentland, Harry



On 12/05/2018 02:59 PM, Nicholas Kazlauskas wrote:
> [Why]
> Legacy cursor plane updates from drm helpers go through the full
> atomic codepath. A high volume of cursor updates through this slow
> code path can cause subsequent page-flips to skip vblank intervals
> since each individual update is slow.
>
> This problem is particularly noticeable for the compton compositor.
>
> [How]
> A fast path for cursor plane updates is added by using DRM asynchronous
> commit support provided by async_check and async_update. These don't do
> a full state/flip_done dependency stall and they don't block other
> commit work.
>
> However, DC still expects itself to be single-threaded for anything
> that can issue register writes. Screen corruption or hangs can occur
> if write sequences overlap. Every call that potentially perform
> register writes needs to be guarded for asynchronous updates to work.
> The dc_lock mutex was added for this.

As far as page flip related register writes concerned this I think this 
was never an issue - we always
supported fully concurrent page flips on different CRTCs meaning writing 
surface address concurrently
on different pipes. So Are you sure you  can't do cursor update 
concurrently against at least page flips ?
I am not sure about full updates.

Andrey

>
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=106175
>
> Cc: Leo Li <sunpeng.li@amd.com>
> Cc: Harry Wentland <harry.wentland@amd.com>
> Signed-off-by: Nicholas Kazlauskas <nicholas.kazlauskas@amd.com>
> ---
>   .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 67 ++++++++++++++++++-
>   .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h |  8 +++
>   2 files changed, 73 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> index 23d61570df17..4df14a50a8ac 100644
> --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> @@ -57,6 +57,7 @@
>   
>   #include <drm/drmP.h>
>   #include <drm/drm_atomic.h>
> +#include <drm/drm_atomic_uapi.h>
>   #include <drm/drm_atomic_helper.h>
>   #include <drm/drm_dp_mst_helper.h>
>   #include <drm/drm_fb_helper.h>
> @@ -133,6 +134,8 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state);
>   static int amdgpu_dm_atomic_check(struct drm_device *dev,
>   				  struct drm_atomic_state *state);
>   
> +static void handle_cursor_update(struct drm_plane *plane,
> +				 struct drm_plane_state *old_plane_state);
>   
>   
>   
> @@ -402,6 +405,8 @@ static int amdgpu_dm_init(struct amdgpu_device *adev)
>   	/* Zero all the fields */
>   	memset(&init_data, 0, sizeof(init_data));
>   
> +	mutex_init(&adev->dm.dc_lock);
> +
>   	if(amdgpu_dm_irq_init(adev)) {
>   		DRM_ERROR("amdgpu: failed to initialize DM IRQ support.\n");
>   		goto error;
> @@ -516,6 +521,9 @@ static void amdgpu_dm_fini(struct amdgpu_device *adev)
>   	/* DC Destroy TODO: Replace destroy DAL */
>   	if (adev->dm.dc)
>   		dc_destroy(&adev->dm.dc);
> +
> +	mutex_destroy(&adev->dm.dc_lock);
> +
>   	return;
>   }
>   
> @@ -3615,10 +3623,43 @@ static int dm_plane_atomic_check(struct drm_plane *plane,
>   	return -EINVAL;
>   }
>   
> +static int dm_plane_atomic_async_check(struct drm_plane *plane,
> +				       struct drm_plane_state *new_plane_state)
> +{
> +	/* Only support async updates on cursor planes. */
> +	if (plane->type != DRM_PLANE_TYPE_CURSOR)
> +		return -EINVAL;
> +
> +	return 0;
> +}
> +
> +static void dm_plane_atomic_async_update(struct drm_plane *plane,
> +					 struct drm_plane_state *new_state)
> +{
> +	struct drm_plane_state *old_state =
> +		drm_atomic_get_old_plane_state(new_state->state, plane);
> +
> +	if (plane->state->fb != new_state->fb)
> +		drm_atomic_set_fb_for_plane(plane->state, new_state->fb);
> +
> +	plane->state->src_x = new_state->src_x;
> +	plane->state->src_y = new_state->src_y;
> +	plane->state->src_w = new_state->src_w;
> +	plane->state->src_h = new_state->src_h;
> +	plane->state->crtc_x = new_state->crtc_x;
> +	plane->state->crtc_y = new_state->crtc_y;
> +	plane->state->crtc_w = new_state->crtc_w;
> +	plane->state->crtc_h = new_state->crtc_h;
> +
> +	handle_cursor_update(plane, old_state);
> +}
> +
>   static const struct drm_plane_helper_funcs dm_plane_helper_funcs = {
>   	.prepare_fb = dm_plane_helper_prepare_fb,
>   	.cleanup_fb = dm_plane_helper_cleanup_fb,
>   	.atomic_check = dm_plane_atomic_check,
> +	.atomic_async_check = dm_plane_atomic_async_check,
> +	.atomic_async_update = dm_plane_atomic_async_update
>   };
>   
>   /*
> @@ -4307,6 +4348,7 @@ static int get_cursor_position(struct drm_plane *plane, struct drm_crtc *crtc,
>   static void handle_cursor_update(struct drm_plane *plane,
>   				 struct drm_plane_state *old_plane_state)
>   {
> +	struct amdgpu_device *adev = plane->dev->dev_private;
>   	struct amdgpu_framebuffer *afb = to_amdgpu_framebuffer(plane->state->fb);
>   	struct drm_crtc *crtc = afb ? plane->state->crtc : old_plane_state->crtc;
>   	struct dm_crtc_state *crtc_state = crtc ? to_dm_crtc_state(crtc->state) : NULL;
> @@ -4331,9 +4373,12 @@ static void handle_cursor_update(struct drm_plane *plane,
>   
>   	if (!position.enable) {
>   		/* turn off cursor */
> -		if (crtc_state && crtc_state->stream)
> +		if (crtc_state && crtc_state->stream) {
> +			mutex_lock(&adev->dm.dc_lock);
>   			dc_stream_set_cursor_position(crtc_state->stream,
>   						      &position);
> +			mutex_unlock(&adev->dm.dc_lock);
> +		}
>   		return;
>   	}
>   
> @@ -4351,6 +4396,7 @@ static void handle_cursor_update(struct drm_plane *plane,
>   	attributes.pitch = attributes.width;
>   
>   	if (crtc_state->stream) {
> +		mutex_lock(&adev->dm.dc_lock);
>   		if (!dc_stream_set_cursor_attributes(crtc_state->stream,
>   							 &attributes))
>   			DRM_ERROR("DC failed to set cursor attributes\n");
> @@ -4358,6 +4404,7 @@ static void handle_cursor_update(struct drm_plane *plane,
>   		if (!dc_stream_set_cursor_position(crtc_state->stream,
>   						   &position))
>   			DRM_ERROR("DC failed to set cursor position\n");
> +		mutex_unlock(&adev->dm.dc_lock);
>   	}
>   }
>   
> @@ -4573,6 +4620,7 @@ static void amdgpu_dm_do_flip(struct drm_crtc *crtc,
>   				&acrtc_state->stream->vrr_infopacket;
>   	}
>   
> +	mutex_lock(&adev->dm.dc_lock);
>   	dc_commit_updates_for_stream(adev->dm.dc,
>   					     surface_updates,
>   					     1,
> @@ -4580,6 +4628,7 @@ static void amdgpu_dm_do_flip(struct drm_crtc *crtc,
>   					     &stream_update,
>   					     &surface_updates->surface,
>   					     state);
> +	mutex_unlock(&adev->dm.dc_lock);
>   
>   	DRM_DEBUG_DRIVER("%s Flipping to hi: 0x%x, low: 0x%x \n",
>   			 __func__,
> @@ -4594,6 +4643,7 @@ static void amdgpu_dm_do_flip(struct drm_crtc *crtc,
>    * with a dc_plane_state and follow the atomic model a bit more closely here.
>    */
>   static bool commit_planes_to_stream(
> +		struct amdgpu_display_manager *dm,
>   		struct dc *dc,
>   		struct dc_plane_state **plane_states,
>   		uint8_t new_plane_count,
> @@ -4670,11 +4720,13 @@ static bool commit_planes_to_stream(
>   		updates[i].scaling_info = &scaling_info[i];
>   	}
>   
> +	mutex_lock(&dm->dc_lock);
>   	dc_commit_updates_for_stream(
>   			dc,
>   			updates,
>   			new_plane_count,
>   			dc_stream, stream_update, plane_states, state);
> +	mutex_unlock(&dm->dc_lock);
>   
>   	kfree(flip_addr);
>   	kfree(plane_info);
> @@ -4780,7 +4832,8 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state,
>   
>   		dc_stream_attach->abm_level = acrtc_state->abm_level;
>   
> -		if (false == commit_planes_to_stream(dm->dc,
> +		if (false == commit_planes_to_stream(dm,
> +							dm->dc,
>   							plane_states_constructed,
>   							planes_count,
>   							acrtc_state,
> @@ -4950,7 +5003,9 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state)
>   
>   	if (dc_state) {
>   		dm_enable_per_frame_crtc_master_sync(dc_state);
> +		mutex_lock(&dm->dc_lock);
>   		WARN_ON(!dc_commit_state(dm->dc, dc_state));
> +		mutex_unlock(&dm->dc_lock);
>   	}
>   
>   	for_each_new_crtc_in_state(state, crtc, new_crtc_state, i) {
> @@ -5012,6 +5067,7 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state)
>   
>   		/*TODO How it works with MPO ?*/
>   		if (!commit_planes_to_stream(
> +				dm,
>   				dm->dc,
>   				status->plane_states,
>   				status->plane_count,
> @@ -5904,6 +5960,13 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev,
>   			ret = -EINVAL;
>   			goto fail;
>   		}
> +	} else if (state->legacy_cursor_update) {
> +		/*
> +		 * This is a fast cursor update coming from the plane update
> +		 * helper, check if it can be done asynchronously for better
> +		 * performance.
> +		 */
> +		state->async_update = !drm_atomic_helper_async_check(dev, state);
>   	}
>   
>   	/* Must be success */
> diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
> index 4326dc256491..25bb91ee80ba 100644
> --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
> +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
> @@ -134,6 +134,14 @@ struct amdgpu_display_manager {
>   
>   	struct drm_modeset_lock atomic_obj_lock;
>   
> +	/**
> +	 * @dc_lock:
> +	 *
> +	 * Guards access to DC functions that can issue register write
> +	 * sequences.
> +	 */
> +	struct mutex dc_lock;
> +
>   	/**
>   	 * @irq_handler_list_low_tab:
>   	 *

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] drm/amd/display: Add fast path for cursor plane updates
       [not found]     ` <8b305d43-56e1-5505-95d3-35e92f3f8494-5C7GfCeVMHo@public.gmane.org>
@ 2018-12-05 20:42       ` Nicholas Kazlauskas
       [not found]         ` <6352276b-5d47-94ea-2dd1-0f955ceed408-5C7GfCeVMHo@public.gmane.org>
  0 siblings, 1 reply; 16+ messages in thread
From: Nicholas Kazlauskas @ 2018-12-05 20:42 UTC (permalink / raw)
  To: Grodzovsky, Andrey, Kazlauskas, Nicholas,
	amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW
  Cc: Li, Sun peng (Leo), Wentland, Harry

On 2018-12-05 3:26 p.m., Grodzovsky, Andrey wrote:
> 
> 
> On 12/05/2018 02:59 PM, Nicholas Kazlauskas wrote:
>> [Why]
>> Legacy cursor plane updates from drm helpers go through the full
>> atomic codepath. A high volume of cursor updates through this slow
>> code path can cause subsequent page-flips to skip vblank intervals
>> since each individual update is slow.
>>
>> This problem is particularly noticeable for the compton compositor.
>>
>> [How]
>> A fast path for cursor plane updates is added by using DRM asynchronous
>> commit support provided by async_check and async_update. These don't do
>> a full state/flip_done dependency stall and they don't block other
>> commit work.
>>
>> However, DC still expects itself to be single-threaded for anything
>> that can issue register writes. Screen corruption or hangs can occur
>> if write sequences overlap. Every call that potentially perform
>> register writes needs to be guarded for asynchronous updates to work.
>> The dc_lock mutex was added for this.
> 
> As far as page flip related register writes concerned this I think this
> was never an issue - we always
> supported fully concurrent page flips on different CRTCs meaning writing
> surface address concurrently
> on different pipes. So Are you sure you  can't do cursor update
> concurrently against at least page flips ?
> I am not sure about full updates.
> 
> Andrey

I think this was true before we started locking the pipes around cursor 
updates (and probably only for dce). The problem that occur here is 
writing to the lock register from multiple threads at the same time.

In general I think the "contract" between dm/dc now is that anything 
from dc that does register write sequences can't be called from multiple 
threads.

Nicholas Kazlauskas

> 
>>
>> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=106175
>>
>> Cc: Leo Li <sunpeng.li@amd.com>
>> Cc: Harry Wentland <harry.wentland@amd.com>
>> Signed-off-by: Nicholas Kazlauskas <nicholas.kazlauskas@amd.com>
>> ---
>>    .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 67 ++++++++++++++++++-
>>    .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h |  8 +++
>>    2 files changed, 73 insertions(+), 2 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
>> index 23d61570df17..4df14a50a8ac 100644
>> --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
>> +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
>> @@ -57,6 +57,7 @@
>>    
>>    #include <drm/drmP.h>
>>    #include <drm/drm_atomic.h>
>> +#include <drm/drm_atomic_uapi.h>
>>    #include <drm/drm_atomic_helper.h>
>>    #include <drm/drm_dp_mst_helper.h>
>>    #include <drm/drm_fb_helper.h>
>> @@ -133,6 +134,8 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state);
>>    static int amdgpu_dm_atomic_check(struct drm_device *dev,
>>    				  struct drm_atomic_state *state);
>>    
>> +static void handle_cursor_update(struct drm_plane *plane,
>> +				 struct drm_plane_state *old_plane_state);
>>    
>>    
>>    
>> @@ -402,6 +405,8 @@ static int amdgpu_dm_init(struct amdgpu_device *adev)
>>    	/* Zero all the fields */
>>    	memset(&init_data, 0, sizeof(init_data));
>>    
>> +	mutex_init(&adev->dm.dc_lock);
>> +
>>    	if(amdgpu_dm_irq_init(adev)) {
>>    		DRM_ERROR("amdgpu: failed to initialize DM IRQ support.\n");
>>    		goto error;
>> @@ -516,6 +521,9 @@ static void amdgpu_dm_fini(struct amdgpu_device *adev)
>>    	/* DC Destroy TODO: Replace destroy DAL */
>>    	if (adev->dm.dc)
>>    		dc_destroy(&adev->dm.dc);
>> +
>> +	mutex_destroy(&adev->dm.dc_lock);
>> +
>>    	return;
>>    }
>>    
>> @@ -3615,10 +3623,43 @@ static int dm_plane_atomic_check(struct drm_plane *plane,
>>    	return -EINVAL;
>>    }
>>    
>> +static int dm_plane_atomic_async_check(struct drm_plane *plane,
>> +				       struct drm_plane_state *new_plane_state)
>> +{
>> +	/* Only support async updates on cursor planes. */
>> +	if (plane->type != DRM_PLANE_TYPE_CURSOR)
>> +		return -EINVAL;
>> +
>> +	return 0;
>> +}
>> +
>> +static void dm_plane_atomic_async_update(struct drm_plane *plane,
>> +					 struct drm_plane_state *new_state)
>> +{
>> +	struct drm_plane_state *old_state =
>> +		drm_atomic_get_old_plane_state(new_state->state, plane);
>> +
>> +	if (plane->state->fb != new_state->fb)
>> +		drm_atomic_set_fb_for_plane(plane->state, new_state->fb);
>> +
>> +	plane->state->src_x = new_state->src_x;
>> +	plane->state->src_y = new_state->src_y;
>> +	plane->state->src_w = new_state->src_w;
>> +	plane->state->src_h = new_state->src_h;
>> +	plane->state->crtc_x = new_state->crtc_x;
>> +	plane->state->crtc_y = new_state->crtc_y;
>> +	plane->state->crtc_w = new_state->crtc_w;
>> +	plane->state->crtc_h = new_state->crtc_h;
>> +
>> +	handle_cursor_update(plane, old_state);
>> +}
>> +
>>    static const struct drm_plane_helper_funcs dm_plane_helper_funcs = {
>>    	.prepare_fb = dm_plane_helper_prepare_fb,
>>    	.cleanup_fb = dm_plane_helper_cleanup_fb,
>>    	.atomic_check = dm_plane_atomic_check,
>> +	.atomic_async_check = dm_plane_atomic_async_check,
>> +	.atomic_async_update = dm_plane_atomic_async_update
>>    };
>>    
>>    /*
>> @@ -4307,6 +4348,7 @@ static int get_cursor_position(struct drm_plane *plane, struct drm_crtc *crtc,
>>    static void handle_cursor_update(struct drm_plane *plane,
>>    				 struct drm_plane_state *old_plane_state)
>>    {
>> +	struct amdgpu_device *adev = plane->dev->dev_private;
>>    	struct amdgpu_framebuffer *afb = to_amdgpu_framebuffer(plane->state->fb);
>>    	struct drm_crtc *crtc = afb ? plane->state->crtc : old_plane_state->crtc;
>>    	struct dm_crtc_state *crtc_state = crtc ? to_dm_crtc_state(crtc->state) : NULL;
>> @@ -4331,9 +4373,12 @@ static void handle_cursor_update(struct drm_plane *plane,
>>    
>>    	if (!position.enable) {
>>    		/* turn off cursor */
>> -		if (crtc_state && crtc_state->stream)
>> +		if (crtc_state && crtc_state->stream) {
>> +			mutex_lock(&adev->dm.dc_lock);
>>    			dc_stream_set_cursor_position(crtc_state->stream,
>>    						      &position);
>> +			mutex_unlock(&adev->dm.dc_lock);
>> +		}
>>    		return;
>>    	}
>>    
>> @@ -4351,6 +4396,7 @@ static void handle_cursor_update(struct drm_plane *plane,
>>    	attributes.pitch = attributes.width;
>>    
>>    	if (crtc_state->stream) {
>> +		mutex_lock(&adev->dm.dc_lock);
>>    		if (!dc_stream_set_cursor_attributes(crtc_state->stream,
>>    							 &attributes))
>>    			DRM_ERROR("DC failed to set cursor attributes\n");
>> @@ -4358,6 +4404,7 @@ static void handle_cursor_update(struct drm_plane *plane,
>>    		if (!dc_stream_set_cursor_position(crtc_state->stream,
>>    						   &position))
>>    			DRM_ERROR("DC failed to set cursor position\n");
>> +		mutex_unlock(&adev->dm.dc_lock);
>>    	}
>>    }
>>    
>> @@ -4573,6 +4620,7 @@ static void amdgpu_dm_do_flip(struct drm_crtc *crtc,
>>    				&acrtc_state->stream->vrr_infopacket;
>>    	}
>>    
>> +	mutex_lock(&adev->dm.dc_lock);
>>    	dc_commit_updates_for_stream(adev->dm.dc,
>>    					     surface_updates,
>>    					     1,
>> @@ -4580,6 +4628,7 @@ static void amdgpu_dm_do_flip(struct drm_crtc *crtc,
>>    					     &stream_update,
>>    					     &surface_updates->surface,
>>    					     state);
>> +	mutex_unlock(&adev->dm.dc_lock);
>>    
>>    	DRM_DEBUG_DRIVER("%s Flipping to hi: 0x%x, low: 0x%x \n",
>>    			 __func__,
>> @@ -4594,6 +4643,7 @@ static void amdgpu_dm_do_flip(struct drm_crtc *crtc,
>>     * with a dc_plane_state and follow the atomic model a bit more closely here.
>>     */
>>    static bool commit_planes_to_stream(
>> +		struct amdgpu_display_manager *dm,
>>    		struct dc *dc,
>>    		struct dc_plane_state **plane_states,
>>    		uint8_t new_plane_count,
>> @@ -4670,11 +4720,13 @@ static bool commit_planes_to_stream(
>>    		updates[i].scaling_info = &scaling_info[i];
>>    	}
>>    
>> +	mutex_lock(&dm->dc_lock);
>>    	dc_commit_updates_for_stream(
>>    			dc,
>>    			updates,
>>    			new_plane_count,
>>    			dc_stream, stream_update, plane_states, state);
>> +	mutex_unlock(&dm->dc_lock);
>>    
>>    	kfree(flip_addr);
>>    	kfree(plane_info);
>> @@ -4780,7 +4832,8 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state,
>>    
>>    		dc_stream_attach->abm_level = acrtc_state->abm_level;
>>    
>> -		if (false == commit_planes_to_stream(dm->dc,
>> +		if (false == commit_planes_to_stream(dm,
>> +							dm->dc,
>>    							plane_states_constructed,
>>    							planes_count,
>>    							acrtc_state,
>> @@ -4950,7 +5003,9 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state)
>>    
>>    	if (dc_state) {
>>    		dm_enable_per_frame_crtc_master_sync(dc_state);
>> +		mutex_lock(&dm->dc_lock);
>>    		WARN_ON(!dc_commit_state(dm->dc, dc_state));
>> +		mutex_unlock(&dm->dc_lock);
>>    	}
>>    
>>    	for_each_new_crtc_in_state(state, crtc, new_crtc_state, i) {
>> @@ -5012,6 +5067,7 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state)
>>    
>>    		/*TODO How it works with MPO ?*/
>>    		if (!commit_planes_to_stream(
>> +				dm,
>>    				dm->dc,
>>    				status->plane_states,
>>    				status->plane_count,
>> @@ -5904,6 +5960,13 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev,
>>    			ret = -EINVAL;
>>    			goto fail;
>>    		}
>> +	} else if (state->legacy_cursor_update) {
>> +		/*
>> +		 * This is a fast cursor update coming from the plane update
>> +		 * helper, check if it can be done asynchronously for better
>> +		 * performance.
>> +		 */
>> +		state->async_update = !drm_atomic_helper_async_check(dev, state);
>>    	}
>>    
>>    	/* Must be success */
>> diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
>> index 4326dc256491..25bb91ee80ba 100644
>> --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
>> +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
>> @@ -134,6 +134,14 @@ struct amdgpu_display_manager {
>>    
>>    	struct drm_modeset_lock atomic_obj_lock;
>>    
>> +	/**
>> +	 * @dc_lock:
>> +	 *
>> +	 * Guards access to DC functions that can issue register write
>> +	 * sequences.
>> +	 */
>> +	struct mutex dc_lock;
>> +
>>    	/**
>>    	 * @irq_handler_list_low_tab:
>>    	 *
> 

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] drm/amd/display: Add fast path for cursor plane updates
       [not found]         ` <6352276b-5d47-94ea-2dd1-0f955ceed408-5C7GfCeVMHo@public.gmane.org>
@ 2018-12-05 20:44           ` Grodzovsky, Andrey
       [not found]             ` <73996dfc-1cd6-e707-9e2b-48a5c89d6cac-5C7GfCeVMHo@public.gmane.org>
  0 siblings, 1 reply; 16+ messages in thread
From: Grodzovsky, Andrey @ 2018-12-05 20:44 UTC (permalink / raw)
  To: Kazlauskas, Nicholas, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW
  Cc: Li, Sun peng (Leo), Wentland, Harry



On 12/05/2018 03:42 PM, Kazlauskas, Nicholas wrote:
> On 2018-12-05 3:26 p.m., Grodzovsky, Andrey wrote:
>>
>> On 12/05/2018 02:59 PM, Nicholas Kazlauskas wrote:
>>> [Why]
>>> Legacy cursor plane updates from drm helpers go through the full
>>> atomic codepath. A high volume of cursor updates through this slow
>>> code path can cause subsequent page-flips to skip vblank intervals
>>> since each individual update is slow.
>>>
>>> This problem is particularly noticeable for the compton compositor.
>>>
>>> [How]
>>> A fast path for cursor plane updates is added by using DRM asynchronous
>>> commit support provided by async_check and async_update. These don't do
>>> a full state/flip_done dependency stall and they don't block other
>>> commit work.
>>>
>>> However, DC still expects itself to be single-threaded for anything
>>> that can issue register writes. Screen corruption or hangs can occur
>>> if write sequences overlap. Every call that potentially perform
>>> register writes needs to be guarded for asynchronous updates to work.
>>> The dc_lock mutex was added for this.
>> As far as page flip related register writes concerned this I think this
>> was never an issue - we always
>> supported fully concurrent page flips on different CRTCs meaning writing
>> surface address concurrently
>> on different pipes. So Are you sure you  can't do cursor update
>> concurrently against at least page flips ?
>> I am not sure about full updates.
>>
>> Andrey
> I think this was true before we started locking the pipes around cursor
> updates (and probably only for dce). The problem that occur here is
> writing to the lock register from multiple threads at the same time.
>
> In general I think the "contract" between dm/dc now is that anything
> from dc that does register write sequences can't be called from multiple
> threads.
>
> Nicholas Kazlauskas

Ok, do note that you also serialize all the page flips now which looks 
unneeded - maybe consider r/w lock to at least avoid that.

Andrey

>
>>> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=106175
>>>
>>> Cc: Leo Li <sunpeng.li@amd.com>
>>> Cc: Harry Wentland <harry.wentland@amd.com>
>>> Signed-off-by: Nicholas Kazlauskas <nicholas.kazlauskas@amd.com>
>>> ---
>>>     .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 67 ++++++++++++++++++-
>>>     .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h |  8 +++
>>>     2 files changed, 73 insertions(+), 2 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
>>> index 23d61570df17..4df14a50a8ac 100644
>>> --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
>>> +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
>>> @@ -57,6 +57,7 @@
>>>     
>>>     #include <drm/drmP.h>
>>>     #include <drm/drm_atomic.h>
>>> +#include <drm/drm_atomic_uapi.h>
>>>     #include <drm/drm_atomic_helper.h>
>>>     #include <drm/drm_dp_mst_helper.h>
>>>     #include <drm/drm_fb_helper.h>
>>> @@ -133,6 +134,8 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state);
>>>     static int amdgpu_dm_atomic_check(struct drm_device *dev,
>>>     				  struct drm_atomic_state *state);
>>>     
>>> +static void handle_cursor_update(struct drm_plane *plane,
>>> +				 struct drm_plane_state *old_plane_state);
>>>     
>>>     
>>>     
>>> @@ -402,6 +405,8 @@ static int amdgpu_dm_init(struct amdgpu_device *adev)
>>>     	/* Zero all the fields */
>>>     	memset(&init_data, 0, sizeof(init_data));
>>>     
>>> +	mutex_init(&adev->dm.dc_lock);
>>> +
>>>     	if(amdgpu_dm_irq_init(adev)) {
>>>     		DRM_ERROR("amdgpu: failed to initialize DM IRQ support.\n");
>>>     		goto error;
>>> @@ -516,6 +521,9 @@ static void amdgpu_dm_fini(struct amdgpu_device *adev)
>>>     	/* DC Destroy TODO: Replace destroy DAL */
>>>     	if (adev->dm.dc)
>>>     		dc_destroy(&adev->dm.dc);
>>> +
>>> +	mutex_destroy(&adev->dm.dc_lock);
>>> +
>>>     	return;
>>>     }
>>>     
>>> @@ -3615,10 +3623,43 @@ static int dm_plane_atomic_check(struct drm_plane *plane,
>>>     	return -EINVAL;
>>>     }
>>>     
>>> +static int dm_plane_atomic_async_check(struct drm_plane *plane,
>>> +				       struct drm_plane_state *new_plane_state)
>>> +{
>>> +	/* Only support async updates on cursor planes. */
>>> +	if (plane->type != DRM_PLANE_TYPE_CURSOR)
>>> +		return -EINVAL;
>>> +
>>> +	return 0;
>>> +}
>>> +
>>> +static void dm_plane_atomic_async_update(struct drm_plane *plane,
>>> +					 struct drm_plane_state *new_state)
>>> +{
>>> +	struct drm_plane_state *old_state =
>>> +		drm_atomic_get_old_plane_state(new_state->state, plane);
>>> +
>>> +	if (plane->state->fb != new_state->fb)
>>> +		drm_atomic_set_fb_for_plane(plane->state, new_state->fb);
>>> +
>>> +	plane->state->src_x = new_state->src_x;
>>> +	plane->state->src_y = new_state->src_y;
>>> +	plane->state->src_w = new_state->src_w;
>>> +	plane->state->src_h = new_state->src_h;
>>> +	plane->state->crtc_x = new_state->crtc_x;
>>> +	plane->state->crtc_y = new_state->crtc_y;
>>> +	plane->state->crtc_w = new_state->crtc_w;
>>> +	plane->state->crtc_h = new_state->crtc_h;
>>> +
>>> +	handle_cursor_update(plane, old_state);
>>> +}
>>> +
>>>     static const struct drm_plane_helper_funcs dm_plane_helper_funcs = {
>>>     	.prepare_fb = dm_plane_helper_prepare_fb,
>>>     	.cleanup_fb = dm_plane_helper_cleanup_fb,
>>>     	.atomic_check = dm_plane_atomic_check,
>>> +	.atomic_async_check = dm_plane_atomic_async_check,
>>> +	.atomic_async_update = dm_plane_atomic_async_update
>>>     };
>>>     
>>>     /*
>>> @@ -4307,6 +4348,7 @@ static int get_cursor_position(struct drm_plane *plane, struct drm_crtc *crtc,
>>>     static void handle_cursor_update(struct drm_plane *plane,
>>>     				 struct drm_plane_state *old_plane_state)
>>>     {
>>> +	struct amdgpu_device *adev = plane->dev->dev_private;
>>>     	struct amdgpu_framebuffer *afb = to_amdgpu_framebuffer(plane->state->fb);
>>>     	struct drm_crtc *crtc = afb ? plane->state->crtc : old_plane_state->crtc;
>>>     	struct dm_crtc_state *crtc_state = crtc ? to_dm_crtc_state(crtc->state) : NULL;
>>> @@ -4331,9 +4373,12 @@ static void handle_cursor_update(struct drm_plane *plane,
>>>     
>>>     	if (!position.enable) {
>>>     		/* turn off cursor */
>>> -		if (crtc_state && crtc_state->stream)
>>> +		if (crtc_state && crtc_state->stream) {
>>> +			mutex_lock(&adev->dm.dc_lock);
>>>     			dc_stream_set_cursor_position(crtc_state->stream,
>>>     						      &position);
>>> +			mutex_unlock(&adev->dm.dc_lock);
>>> +		}
>>>     		return;
>>>     	}
>>>     
>>> @@ -4351,6 +4396,7 @@ static void handle_cursor_update(struct drm_plane *plane,
>>>     	attributes.pitch = attributes.width;
>>>     
>>>     	if (crtc_state->stream) {
>>> +		mutex_lock(&adev->dm.dc_lock);
>>>     		if (!dc_stream_set_cursor_attributes(crtc_state->stream,
>>>     							 &attributes))
>>>     			DRM_ERROR("DC failed to set cursor attributes\n");
>>> @@ -4358,6 +4404,7 @@ static void handle_cursor_update(struct drm_plane *plane,
>>>     		if (!dc_stream_set_cursor_position(crtc_state->stream,
>>>     						   &position))
>>>     			DRM_ERROR("DC failed to set cursor position\n");
>>> +		mutex_unlock(&adev->dm.dc_lock);
>>>     	}
>>>     }
>>>     
>>> @@ -4573,6 +4620,7 @@ static void amdgpu_dm_do_flip(struct drm_crtc *crtc,
>>>     				&acrtc_state->stream->vrr_infopacket;
>>>     	}
>>>     
>>> +	mutex_lock(&adev->dm.dc_lock);
>>>     	dc_commit_updates_for_stream(adev->dm.dc,
>>>     					     surface_updates,
>>>     					     1,
>>> @@ -4580,6 +4628,7 @@ static void amdgpu_dm_do_flip(struct drm_crtc *crtc,
>>>     					     &stream_update,
>>>     					     &surface_updates->surface,
>>>     					     state);
>>> +	mutex_unlock(&adev->dm.dc_lock);
>>>     
>>>     	DRM_DEBUG_DRIVER("%s Flipping to hi: 0x%x, low: 0x%x \n",
>>>     			 __func__,
>>> @@ -4594,6 +4643,7 @@ static void amdgpu_dm_do_flip(struct drm_crtc *crtc,
>>>      * with a dc_plane_state and follow the atomic model a bit more closely here.
>>>      */
>>>     static bool commit_planes_to_stream(
>>> +		struct amdgpu_display_manager *dm,
>>>     		struct dc *dc,
>>>     		struct dc_plane_state **plane_states,
>>>     		uint8_t new_plane_count,
>>> @@ -4670,11 +4720,13 @@ static bool commit_planes_to_stream(
>>>     		updates[i].scaling_info = &scaling_info[i];
>>>     	}
>>>     
>>> +	mutex_lock(&dm->dc_lock);
>>>     	dc_commit_updates_for_stream(
>>>     			dc,
>>>     			updates,
>>>     			new_plane_count,
>>>     			dc_stream, stream_update, plane_states, state);
>>> +	mutex_unlock(&dm->dc_lock);
>>>     
>>>     	kfree(flip_addr);
>>>     	kfree(plane_info);
>>> @@ -4780,7 +4832,8 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state,
>>>     
>>>     		dc_stream_attach->abm_level = acrtc_state->abm_level;
>>>     
>>> -		if (false == commit_planes_to_stream(dm->dc,
>>> +		if (false == commit_planes_to_stream(dm,
>>> +							dm->dc,
>>>     							plane_states_constructed,
>>>     							planes_count,
>>>     							acrtc_state,
>>> @@ -4950,7 +5003,9 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state)
>>>     
>>>     	if (dc_state) {
>>>     		dm_enable_per_frame_crtc_master_sync(dc_state);
>>> +		mutex_lock(&dm->dc_lock);
>>>     		WARN_ON(!dc_commit_state(dm->dc, dc_state));
>>> +		mutex_unlock(&dm->dc_lock);
>>>     	}
>>>     
>>>     	for_each_new_crtc_in_state(state, crtc, new_crtc_state, i) {
>>> @@ -5012,6 +5067,7 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state)
>>>     
>>>     		/*TODO How it works with MPO ?*/
>>>     		if (!commit_planes_to_stream(
>>> +				dm,
>>>     				dm->dc,
>>>     				status->plane_states,
>>>     				status->plane_count,
>>> @@ -5904,6 +5960,13 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev,
>>>     			ret = -EINVAL;
>>>     			goto fail;
>>>     		}
>>> +	} else if (state->legacy_cursor_update) {
>>> +		/*
>>> +		 * This is a fast cursor update coming from the plane update
>>> +		 * helper, check if it can be done asynchronously for better
>>> +		 * performance.
>>> +		 */
>>> +		state->async_update = !drm_atomic_helper_async_check(dev, state);
>>>     	}
>>>     
>>>     	/* Must be success */
>>> diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
>>> index 4326dc256491..25bb91ee80ba 100644
>>> --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
>>> +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
>>> @@ -134,6 +134,14 @@ struct amdgpu_display_manager {
>>>     
>>>     	struct drm_modeset_lock atomic_obj_lock;
>>>     
>>> +	/**
>>> +	 * @dc_lock:
>>> +	 *
>>> +	 * Guards access to DC functions that can issue register write
>>> +	 * sequences.
>>> +	 */
>>> +	struct mutex dc_lock;
>>> +
>>>     	/**
>>>     	 * @irq_handler_list_low_tab:
>>>     	 *

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] drm/amd/display: Add fast path for cursor plane updates
       [not found]             ` <73996dfc-1cd6-e707-9e2b-48a5c89d6cac-5C7GfCeVMHo@public.gmane.org>
@ 2018-12-05 21:01               ` Nicholas Kazlauskas
       [not found]                 ` <5c4c64a1-cc84-e9c2-6275-ca8ac76237b1-5C7GfCeVMHo@public.gmane.org>
  0 siblings, 1 reply; 16+ messages in thread
From: Nicholas Kazlauskas @ 2018-12-05 21:01 UTC (permalink / raw)
  To: Grodzovsky, Andrey, Kazlauskas, Nicholas,
	amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW
  Cc: Li, Sun peng (Leo), Wentland, Harry

On 2018-12-05 3:44 p.m., Grodzovsky, Andrey wrote:
> 
> 
> On 12/05/2018 03:42 PM, Kazlauskas, Nicholas wrote:
>> On 2018-12-05 3:26 p.m., Grodzovsky, Andrey wrote:
>>>
>>> On 12/05/2018 02:59 PM, Nicholas Kazlauskas wrote:
>>>> [Why]
>>>> Legacy cursor plane updates from drm helpers go through the full
>>>> atomic codepath. A high volume of cursor updates through this slow
>>>> code path can cause subsequent page-flips to skip vblank intervals
>>>> since each individual update is slow.
>>>>
>>>> This problem is particularly noticeable for the compton compositor.
>>>>
>>>> [How]
>>>> A fast path for cursor plane updates is added by using DRM asynchronous
>>>> commit support provided by async_check and async_update. These don't do
>>>> a full state/flip_done dependency stall and they don't block other
>>>> commit work.
>>>>
>>>> However, DC still expects itself to be single-threaded for anything
>>>> that can issue register writes. Screen corruption or hangs can occur
>>>> if write sequences overlap. Every call that potentially perform
>>>> register writes needs to be guarded for asynchronous updates to work.
>>>> The dc_lock mutex was added for this.
>>> As far as page flip related register writes concerned this I think this
>>> was never an issue - we always
>>> supported fully concurrent page flips on different CRTCs meaning writing
>>> surface address concurrently
>>> on different pipes. So Are you sure you  can't do cursor update
>>> concurrently against at least page flips ?
>>> I am not sure about full updates.
>>>
>>> Andrey
>> I think this was true before we started locking the pipes around cursor
>> updates (and probably only for dce). The problem that occur here is
>> writing to the lock register from multiple threads at the same time.
>>
>> In general I think the "contract" between dm/dc now is that anything
>> from dc that does register write sequences can't be called from multiple
>> threads.
>>
>> Nicholas Kazlauskas
> 
> Ok, do note that you also serialize all the page flips now which looks
> unneeded - maybe consider r/w lock to at least avoid that.
> 
> Andrey

Yeah, they definitely shouldn't be happening at the same time as the 
cursor calls.

I'd need to look into whether it's okay now to do concurrent writes from 
the stream update functions though, but I'd imagine it's not something 
we'd want to be doing. A r/w lock would work if we could though.

Nicholas Kazlauskas

> 
>>
>>>> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=106175
>>>>
>>>> Cc: Leo Li <sunpeng.li@amd.com>
>>>> Cc: Harry Wentland <harry.wentland@amd.com>
>>>> Signed-off-by: Nicholas Kazlauskas <nicholas.kazlauskas@amd.com>
>>>> ---
>>>>      .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 67 ++++++++++++++++++-
>>>>      .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h |  8 +++
>>>>      2 files changed, 73 insertions(+), 2 deletions(-)
>>>>
>>>> diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
>>>> index 23d61570df17..4df14a50a8ac 100644
>>>> --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
>>>> +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
>>>> @@ -57,6 +57,7 @@
>>>>      
>>>>      #include <drm/drmP.h>
>>>>      #include <drm/drm_atomic.h>
>>>> +#include <drm/drm_atomic_uapi.h>
>>>>      #include <drm/drm_atomic_helper.h>
>>>>      #include <drm/drm_dp_mst_helper.h>
>>>>      #include <drm/drm_fb_helper.h>
>>>> @@ -133,6 +134,8 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state);
>>>>      static int amdgpu_dm_atomic_check(struct drm_device *dev,
>>>>      				  struct drm_atomic_state *state);
>>>>      
>>>> +static void handle_cursor_update(struct drm_plane *plane,
>>>> +				 struct drm_plane_state *old_plane_state);
>>>>      
>>>>      
>>>>      
>>>> @@ -402,6 +405,8 @@ static int amdgpu_dm_init(struct amdgpu_device *adev)
>>>>      	/* Zero all the fields */
>>>>      	memset(&init_data, 0, sizeof(init_data));
>>>>      
>>>> +	mutex_init(&adev->dm.dc_lock);
>>>> +
>>>>      	if(amdgpu_dm_irq_init(adev)) {
>>>>      		DRM_ERROR("amdgpu: failed to initialize DM IRQ support.\n");
>>>>      		goto error;
>>>> @@ -516,6 +521,9 @@ static void amdgpu_dm_fini(struct amdgpu_device *adev)
>>>>      	/* DC Destroy TODO: Replace destroy DAL */
>>>>      	if (adev->dm.dc)
>>>>      		dc_destroy(&adev->dm.dc);
>>>> +
>>>> +	mutex_destroy(&adev->dm.dc_lock);
>>>> +
>>>>      	return;
>>>>      }
>>>>      
>>>> @@ -3615,10 +3623,43 @@ static int dm_plane_atomic_check(struct drm_plane *plane,
>>>>      	return -EINVAL;
>>>>      }
>>>>      
>>>> +static int dm_plane_atomic_async_check(struct drm_plane *plane,
>>>> +				       struct drm_plane_state *new_plane_state)
>>>> +{
>>>> +	/* Only support async updates on cursor planes. */
>>>> +	if (plane->type != DRM_PLANE_TYPE_CURSOR)
>>>> +		return -EINVAL;
>>>> +
>>>> +	return 0;
>>>> +}
>>>> +
>>>> +static void dm_plane_atomic_async_update(struct drm_plane *plane,
>>>> +					 struct drm_plane_state *new_state)
>>>> +{
>>>> +	struct drm_plane_state *old_state =
>>>> +		drm_atomic_get_old_plane_state(new_state->state, plane);
>>>> +
>>>> +	if (plane->state->fb != new_state->fb)
>>>> +		drm_atomic_set_fb_for_plane(plane->state, new_state->fb);
>>>> +
>>>> +	plane->state->src_x = new_state->src_x;
>>>> +	plane->state->src_y = new_state->src_y;
>>>> +	plane->state->src_w = new_state->src_w;
>>>> +	plane->state->src_h = new_state->src_h;
>>>> +	plane->state->crtc_x = new_state->crtc_x;
>>>> +	plane->state->crtc_y = new_state->crtc_y;
>>>> +	plane->state->crtc_w = new_state->crtc_w;
>>>> +	plane->state->crtc_h = new_state->crtc_h;
>>>> +
>>>> +	handle_cursor_update(plane, old_state);
>>>> +}
>>>> +
>>>>      static const struct drm_plane_helper_funcs dm_plane_helper_funcs = {
>>>>      	.prepare_fb = dm_plane_helper_prepare_fb,
>>>>      	.cleanup_fb = dm_plane_helper_cleanup_fb,
>>>>      	.atomic_check = dm_plane_atomic_check,
>>>> +	.atomic_async_check = dm_plane_atomic_async_check,
>>>> +	.atomic_async_update = dm_plane_atomic_async_update
>>>>      };
>>>>      
>>>>      /*
>>>> @@ -4307,6 +4348,7 @@ static int get_cursor_position(struct drm_plane *plane, struct drm_crtc *crtc,
>>>>      static void handle_cursor_update(struct drm_plane *plane,
>>>>      				 struct drm_plane_state *old_plane_state)
>>>>      {
>>>> +	struct amdgpu_device *adev = plane->dev->dev_private;
>>>>      	struct amdgpu_framebuffer *afb = to_amdgpu_framebuffer(plane->state->fb);
>>>>      	struct drm_crtc *crtc = afb ? plane->state->crtc : old_plane_state->crtc;
>>>>      	struct dm_crtc_state *crtc_state = crtc ? to_dm_crtc_state(crtc->state) : NULL;
>>>> @@ -4331,9 +4373,12 @@ static void handle_cursor_update(struct drm_plane *plane,
>>>>      
>>>>      	if (!position.enable) {
>>>>      		/* turn off cursor */
>>>> -		if (crtc_state && crtc_state->stream)
>>>> +		if (crtc_state && crtc_state->stream) {
>>>> +			mutex_lock(&adev->dm.dc_lock);
>>>>      			dc_stream_set_cursor_position(crtc_state->stream,
>>>>      						      &position);
>>>> +			mutex_unlock(&adev->dm.dc_lock);
>>>> +		}
>>>>      		return;
>>>>      	}
>>>>      
>>>> @@ -4351,6 +4396,7 @@ static void handle_cursor_update(struct drm_plane *plane,
>>>>      	attributes.pitch = attributes.width;
>>>>      
>>>>      	if (crtc_state->stream) {
>>>> +		mutex_lock(&adev->dm.dc_lock);
>>>>      		if (!dc_stream_set_cursor_attributes(crtc_state->stream,
>>>>      							 &attributes))
>>>>      			DRM_ERROR("DC failed to set cursor attributes\n");
>>>> @@ -4358,6 +4404,7 @@ static void handle_cursor_update(struct drm_plane *plane,
>>>>      		if (!dc_stream_set_cursor_position(crtc_state->stream,
>>>>      						   &position))
>>>>      			DRM_ERROR("DC failed to set cursor position\n");
>>>> +		mutex_unlock(&adev->dm.dc_lock);
>>>>      	}
>>>>      }
>>>>      
>>>> @@ -4573,6 +4620,7 @@ static void amdgpu_dm_do_flip(struct drm_crtc *crtc,
>>>>      				&acrtc_state->stream->vrr_infopacket;
>>>>      	}
>>>>      
>>>> +	mutex_lock(&adev->dm.dc_lock);
>>>>      	dc_commit_updates_for_stream(adev->dm.dc,
>>>>      					     surface_updates,
>>>>      					     1,
>>>> @@ -4580,6 +4628,7 @@ static void amdgpu_dm_do_flip(struct drm_crtc *crtc,
>>>>      					     &stream_update,
>>>>      					     &surface_updates->surface,
>>>>      					     state);
>>>> +	mutex_unlock(&adev->dm.dc_lock);
>>>>      
>>>>      	DRM_DEBUG_DRIVER("%s Flipping to hi: 0x%x, low: 0x%x \n",
>>>>      			 __func__,
>>>> @@ -4594,6 +4643,7 @@ static void amdgpu_dm_do_flip(struct drm_crtc *crtc,
>>>>       * with a dc_plane_state and follow the atomic model a bit more closely here.
>>>>       */
>>>>      static bool commit_planes_to_stream(
>>>> +		struct amdgpu_display_manager *dm,
>>>>      		struct dc *dc,
>>>>      		struct dc_plane_state **plane_states,
>>>>      		uint8_t new_plane_count,
>>>> @@ -4670,11 +4720,13 @@ static bool commit_planes_to_stream(
>>>>      		updates[i].scaling_info = &scaling_info[i];
>>>>      	}
>>>>      
>>>> +	mutex_lock(&dm->dc_lock);
>>>>      	dc_commit_updates_for_stream(
>>>>      			dc,
>>>>      			updates,
>>>>      			new_plane_count,
>>>>      			dc_stream, stream_update, plane_states, state);
>>>> +	mutex_unlock(&dm->dc_lock);
>>>>      
>>>>      	kfree(flip_addr);
>>>>      	kfree(plane_info);
>>>> @@ -4780,7 +4832,8 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state,
>>>>      
>>>>      		dc_stream_attach->abm_level = acrtc_state->abm_level;
>>>>      
>>>> -		if (false == commit_planes_to_stream(dm->dc,
>>>> +		if (false == commit_planes_to_stream(dm,
>>>> +							dm->dc,
>>>>      							plane_states_constructed,
>>>>      							planes_count,
>>>>      							acrtc_state,
>>>> @@ -4950,7 +5003,9 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state)
>>>>      
>>>>      	if (dc_state) {
>>>>      		dm_enable_per_frame_crtc_master_sync(dc_state);
>>>> +		mutex_lock(&dm->dc_lock);
>>>>      		WARN_ON(!dc_commit_state(dm->dc, dc_state));
>>>> +		mutex_unlock(&dm->dc_lock);
>>>>      	}
>>>>      
>>>>      	for_each_new_crtc_in_state(state, crtc, new_crtc_state, i) {
>>>> @@ -5012,6 +5067,7 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state)
>>>>      
>>>>      		/*TODO How it works with MPO ?*/
>>>>      		if (!commit_planes_to_stream(
>>>> +				dm,
>>>>      				dm->dc,
>>>>      				status->plane_states,
>>>>      				status->plane_count,
>>>> @@ -5904,6 +5960,13 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev,
>>>>      			ret = -EINVAL;
>>>>      			goto fail;
>>>>      		}
>>>> +	} else if (state->legacy_cursor_update) {
>>>> +		/*
>>>> +		 * This is a fast cursor update coming from the plane update
>>>> +		 * helper, check if it can be done asynchronously for better
>>>> +		 * performance.
>>>> +		 */
>>>> +		state->async_update = !drm_atomic_helper_async_check(dev, state);
>>>>      	}
>>>>      
>>>>      	/* Must be success */
>>>> diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
>>>> index 4326dc256491..25bb91ee80ba 100644
>>>> --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
>>>> +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
>>>> @@ -134,6 +134,14 @@ struct amdgpu_display_manager {
>>>>      
>>>>      	struct drm_modeset_lock atomic_obj_lock;
>>>>      
>>>> +	/**
>>>> +	 * @dc_lock:
>>>> +	 *
>>>> +	 * Guards access to DC functions that can issue register write
>>>> +	 * sequences.
>>>> +	 */
>>>> +	struct mutex dc_lock;
>>>> +
>>>>      	/**
>>>>      	 * @irq_handler_list_low_tab:
>>>>      	 *
> 

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] drm/amd/display: Add fast path for cursor plane updates
       [not found]                 ` <5c4c64a1-cc84-e9c2-6275-ca8ac76237b1-5C7GfCeVMHo@public.gmane.org>
@ 2018-12-06 13:42                   ` Kazlauskas, Nicholas
       [not found]                     ` <a7bad892-8215-16a8-65a6-328533395d03-5C7GfCeVMHo@public.gmane.org>
  0 siblings, 1 reply; 16+ messages in thread
From: Kazlauskas, Nicholas @ 2018-12-06 13:42 UTC (permalink / raw)
  To: Grodzovsky, Andrey, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW
  Cc: Li, Sun peng (Leo), Wentland, Harry

On 12/5/18 4:01 PM, Kazlauskas, Nicholas wrote:
> On 2018-12-05 3:44 p.m., Grodzovsky, Andrey wrote:
>>
>>
>> On 12/05/2018 03:42 PM, Kazlauskas, Nicholas wrote:
>>> On 2018-12-05 3:26 p.m., Grodzovsky, Andrey wrote:
>>>>
>>>> On 12/05/2018 02:59 PM, Nicholas Kazlauskas wrote:
>>>>> [Why]
>>>>> Legacy cursor plane updates from drm helpers go through the full
>>>>> atomic codepath. A high volume of cursor updates through this slow
>>>>> code path can cause subsequent page-flips to skip vblank intervals
>>>>> since each individual update is slow.
>>>>>
>>>>> This problem is particularly noticeable for the compton compositor.
>>>>>
>>>>> [How]
>>>>> A fast path for cursor plane updates is added by using DRM asynchronous
>>>>> commit support provided by async_check and async_update. These don't do
>>>>> a full state/flip_done dependency stall and they don't block other
>>>>> commit work.
>>>>>
>>>>> However, DC still expects itself to be single-threaded for anything
>>>>> that can issue register writes. Screen corruption or hangs can occur
>>>>> if write sequences overlap. Every call that potentially perform
>>>>> register writes needs to be guarded for asynchronous updates to work.
>>>>> The dc_lock mutex was added for this.
>>>> As far as page flip related register writes concerned this I think this
>>>> was never an issue - we always
>>>> supported fully concurrent page flips on different CRTCs meaning writing
>>>> surface address concurrently
>>>> on different pipes. So Are you sure you  can't do cursor update
>>>> concurrently against at least page flips ?
>>>> I am not sure about full updates.
>>>>
>>>> Andrey
>>> I think this was true before we started locking the pipes around cursor
>>> updates (and probably only for dce). The problem that occur here is
>>> writing to the lock register from multiple threads at the same time.
>>>
>>> In general I think the "contract" between dm/dc now is that anything
>>> from dc that does register write sequences can't be called from multiple
>>> threads.
>>>
>>> Nicholas Kazlauskas
>>
>> Ok, do note that you also serialize all the page flips now which looks
>> unneeded - maybe consider r/w lock to at least avoid that.
>>
>> Andrey
> 
> Yeah, they definitely shouldn't be happening at the same time as the
> cursor calls.
> 
> I'd need to look into whether it's okay now to do concurrent writes from
> the stream update functions though, but I'd imagine it's not something
> we'd want to be doing. A r/w lock would work if we could though.
> 
> Nicholas Kazlauskas

A reader/writer lock wouldn't help here, actually.

Fast updates (and page flips in particular) need to lock the pipe as 
well, see: commit_planes_for_stream.

Nicholas Kazlauskas

> 
>>
>>>
>>>>> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=106175
>>>>>
>>>>> Cc: Leo Li <sunpeng.li@amd.com>
>>>>> Cc: Harry Wentland <harry.wentland@amd.com>
>>>>> Signed-off-by: Nicholas Kazlauskas <nicholas.kazlauskas@amd.com>
>>>>> ---
>>>>>       .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 67 ++++++++++++++++++-
>>>>>       .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h |  8 +++
>>>>>       2 files changed, 73 insertions(+), 2 deletions(-)
>>>>>
>>>>> diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
>>>>> index 23d61570df17..4df14a50a8ac 100644
>>>>> --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
>>>>> +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
>>>>> @@ -57,6 +57,7 @@
>>>>>       
>>>>>       #include <drm/drmP.h>
>>>>>       #include <drm/drm_atomic.h>
>>>>> +#include <drm/drm_atomic_uapi.h>
>>>>>       #include <drm/drm_atomic_helper.h>
>>>>>       #include <drm/drm_dp_mst_helper.h>
>>>>>       #include <drm/drm_fb_helper.h>
>>>>> @@ -133,6 +134,8 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state);
>>>>>       static int amdgpu_dm_atomic_check(struct drm_device *dev,
>>>>>       				  struct drm_atomic_state *state);
>>>>>       
>>>>> +static void handle_cursor_update(struct drm_plane *plane,
>>>>> +				 struct drm_plane_state *old_plane_state);
>>>>>       
>>>>>       
>>>>>       
>>>>> @@ -402,6 +405,8 @@ static int amdgpu_dm_init(struct amdgpu_device *adev)
>>>>>       	/* Zero all the fields */
>>>>>       	memset(&init_data, 0, sizeof(init_data));
>>>>>       
>>>>> +	mutex_init(&adev->dm.dc_lock);
>>>>> +
>>>>>       	if(amdgpu_dm_irq_init(adev)) {
>>>>>       		DRM_ERROR("amdgpu: failed to initialize DM IRQ support.\n");
>>>>>       		goto error;
>>>>> @@ -516,6 +521,9 @@ static void amdgpu_dm_fini(struct amdgpu_device *adev)
>>>>>       	/* DC Destroy TODO: Replace destroy DAL */
>>>>>       	if (adev->dm.dc)
>>>>>       		dc_destroy(&adev->dm.dc);
>>>>> +
>>>>> +	mutex_destroy(&adev->dm.dc_lock);
>>>>> +
>>>>>       	return;
>>>>>       }
>>>>>       
>>>>> @@ -3615,10 +3623,43 @@ static int dm_plane_atomic_check(struct drm_plane *plane,
>>>>>       	return -EINVAL;
>>>>>       }
>>>>>       
>>>>> +static int dm_plane_atomic_async_check(struct drm_plane *plane,
>>>>> +				       struct drm_plane_state *new_plane_state)
>>>>> +{
>>>>> +	/* Only support async updates on cursor planes. */
>>>>> +	if (plane->type != DRM_PLANE_TYPE_CURSOR)
>>>>> +		return -EINVAL;
>>>>> +
>>>>> +	return 0;
>>>>> +}
>>>>> +
>>>>> +static void dm_plane_atomic_async_update(struct drm_plane *plane,
>>>>> +					 struct drm_plane_state *new_state)
>>>>> +{
>>>>> +	struct drm_plane_state *old_state =
>>>>> +		drm_atomic_get_old_plane_state(new_state->state, plane);
>>>>> +
>>>>> +	if (plane->state->fb != new_state->fb)
>>>>> +		drm_atomic_set_fb_for_plane(plane->state, new_state->fb);
>>>>> +
>>>>> +	plane->state->src_x = new_state->src_x;
>>>>> +	plane->state->src_y = new_state->src_y;
>>>>> +	plane->state->src_w = new_state->src_w;
>>>>> +	plane->state->src_h = new_state->src_h;
>>>>> +	plane->state->crtc_x = new_state->crtc_x;
>>>>> +	plane->state->crtc_y = new_state->crtc_y;
>>>>> +	plane->state->crtc_w = new_state->crtc_w;
>>>>> +	plane->state->crtc_h = new_state->crtc_h;
>>>>> +
>>>>> +	handle_cursor_update(plane, old_state);
>>>>> +}
>>>>> +
>>>>>       static const struct drm_plane_helper_funcs dm_plane_helper_funcs = {
>>>>>       	.prepare_fb = dm_plane_helper_prepare_fb,
>>>>>       	.cleanup_fb = dm_plane_helper_cleanup_fb,
>>>>>       	.atomic_check = dm_plane_atomic_check,
>>>>> +	.atomic_async_check = dm_plane_atomic_async_check,
>>>>> +	.atomic_async_update = dm_plane_atomic_async_update
>>>>>       };
>>>>>       
>>>>>       /*
>>>>> @@ -4307,6 +4348,7 @@ static int get_cursor_position(struct drm_plane *plane, struct drm_crtc *crtc,
>>>>>       static void handle_cursor_update(struct drm_plane *plane,
>>>>>       				 struct drm_plane_state *old_plane_state)
>>>>>       {
>>>>> +	struct amdgpu_device *adev = plane->dev->dev_private;
>>>>>       	struct amdgpu_framebuffer *afb = to_amdgpu_framebuffer(plane->state->fb);
>>>>>       	struct drm_crtc *crtc = afb ? plane->state->crtc : old_plane_state->crtc;
>>>>>       	struct dm_crtc_state *crtc_state = crtc ? to_dm_crtc_state(crtc->state) : NULL;
>>>>> @@ -4331,9 +4373,12 @@ static void handle_cursor_update(struct drm_plane *plane,
>>>>>       
>>>>>       	if (!position.enable) {
>>>>>       		/* turn off cursor */
>>>>> -		if (crtc_state && crtc_state->stream)
>>>>> +		if (crtc_state && crtc_state->stream) {
>>>>> +			mutex_lock(&adev->dm.dc_lock);
>>>>>       			dc_stream_set_cursor_position(crtc_state->stream,
>>>>>       						      &position);
>>>>> +			mutex_unlock(&adev->dm.dc_lock);
>>>>> +		}
>>>>>       		return;
>>>>>       	}
>>>>>       
>>>>> @@ -4351,6 +4396,7 @@ static void handle_cursor_update(struct drm_plane *plane,
>>>>>       	attributes.pitch = attributes.width;
>>>>>       
>>>>>       	if (crtc_state->stream) {
>>>>> +		mutex_lock(&adev->dm.dc_lock);
>>>>>       		if (!dc_stream_set_cursor_attributes(crtc_state->stream,
>>>>>       							 &attributes))
>>>>>       			DRM_ERROR("DC failed to set cursor attributes\n");
>>>>> @@ -4358,6 +4404,7 @@ static void handle_cursor_update(struct drm_plane *plane,
>>>>>       		if (!dc_stream_set_cursor_position(crtc_state->stream,
>>>>>       						   &position))
>>>>>       			DRM_ERROR("DC failed to set cursor position\n");
>>>>> +		mutex_unlock(&adev->dm.dc_lock);
>>>>>       	}
>>>>>       }
>>>>>       
>>>>> @@ -4573,6 +4620,7 @@ static void amdgpu_dm_do_flip(struct drm_crtc *crtc,
>>>>>       				&acrtc_state->stream->vrr_infopacket;
>>>>>       	}
>>>>>       
>>>>> +	mutex_lock(&adev->dm.dc_lock);
>>>>>       	dc_commit_updates_for_stream(adev->dm.dc,
>>>>>       					     surface_updates,
>>>>>       					     1,
>>>>> @@ -4580,6 +4628,7 @@ static void amdgpu_dm_do_flip(struct drm_crtc *crtc,
>>>>>       					     &stream_update,
>>>>>       					     &surface_updates->surface,
>>>>>       					     state);
>>>>> +	mutex_unlock(&adev->dm.dc_lock);
>>>>>       
>>>>>       	DRM_DEBUG_DRIVER("%s Flipping to hi: 0x%x, low: 0x%x \n",
>>>>>       			 __func__,
>>>>> @@ -4594,6 +4643,7 @@ static void amdgpu_dm_do_flip(struct drm_crtc *crtc,
>>>>>        * with a dc_plane_state and follow the atomic model a bit more closely here.
>>>>>        */
>>>>>       static bool commit_planes_to_stream(
>>>>> +		struct amdgpu_display_manager *dm,
>>>>>       		struct dc *dc,
>>>>>       		struct dc_plane_state **plane_states,
>>>>>       		uint8_t new_plane_count,
>>>>> @@ -4670,11 +4720,13 @@ static bool commit_planes_to_stream(
>>>>>       		updates[i].scaling_info = &scaling_info[i];
>>>>>       	}
>>>>>       
>>>>> +	mutex_lock(&dm->dc_lock);
>>>>>       	dc_commit_updates_for_stream(
>>>>>       			dc,
>>>>>       			updates,
>>>>>       			new_plane_count,
>>>>>       			dc_stream, stream_update, plane_states, state);
>>>>> +	mutex_unlock(&dm->dc_lock);
>>>>>       
>>>>>       	kfree(flip_addr);
>>>>>       	kfree(plane_info);
>>>>> @@ -4780,7 +4832,8 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state,
>>>>>       
>>>>>       		dc_stream_attach->abm_level = acrtc_state->abm_level;
>>>>>       
>>>>> -		if (false == commit_planes_to_stream(dm->dc,
>>>>> +		if (false == commit_planes_to_stream(dm,
>>>>> +							dm->dc,
>>>>>       							plane_states_constructed,
>>>>>       							planes_count,
>>>>>       							acrtc_state,
>>>>> @@ -4950,7 +5003,9 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state)
>>>>>       
>>>>>       	if (dc_state) {
>>>>>       		dm_enable_per_frame_crtc_master_sync(dc_state);
>>>>> +		mutex_lock(&dm->dc_lock);
>>>>>       		WARN_ON(!dc_commit_state(dm->dc, dc_state));
>>>>> +		mutex_unlock(&dm->dc_lock);
>>>>>       	}
>>>>>       
>>>>>       	for_each_new_crtc_in_state(state, crtc, new_crtc_state, i) {
>>>>> @@ -5012,6 +5067,7 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state)
>>>>>       
>>>>>       		/*TODO How it works with MPO ?*/
>>>>>       		if (!commit_planes_to_stream(
>>>>> +				dm,
>>>>>       				dm->dc,
>>>>>       				status->plane_states,
>>>>>       				status->plane_count,
>>>>> @@ -5904,6 +5960,13 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev,
>>>>>       			ret = -EINVAL;
>>>>>       			goto fail;
>>>>>       		}
>>>>> +	} else if (state->legacy_cursor_update) {
>>>>> +		/*
>>>>> +		 * This is a fast cursor update coming from the plane update
>>>>> +		 * helper, check if it can be done asynchronously for better
>>>>> +		 * performance.
>>>>> +		 */
>>>>> +		state->async_update = !drm_atomic_helper_async_check(dev, state);
>>>>>       	}
>>>>>       
>>>>>       	/* Must be success */
>>>>> diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
>>>>> index 4326dc256491..25bb91ee80ba 100644
>>>>> --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
>>>>> +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
>>>>> @@ -134,6 +134,14 @@ struct amdgpu_display_manager {
>>>>>       
>>>>>       	struct drm_modeset_lock atomic_obj_lock;
>>>>>       
>>>>> +	/**
>>>>> +	 * @dc_lock:
>>>>> +	 *
>>>>> +	 * Guards access to DC functions that can issue register write
>>>>> +	 * sequences.
>>>>> +	 */
>>>>> +	struct mutex dc_lock;
>>>>> +
>>>>>       	/**
>>>>>       	 * @irq_handler_list_low_tab:
>>>>>       	 *
>>
> 

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] drm/amd/display: Add fast path for cursor plane updates
       [not found]                     ` <a7bad892-8215-16a8-65a6-328533395d03-5C7GfCeVMHo@public.gmane.org>
@ 2018-12-06 15:36                       ` Grodzovsky, Andrey
       [not found]                         ` <13f4b0bc-fcda-be57-2416-aa3ba5ff065c-5C7GfCeVMHo@public.gmane.org>
  0 siblings, 1 reply; 16+ messages in thread
From: Grodzovsky, Andrey @ 2018-12-06 15:36 UTC (permalink / raw)
  To: Kazlauskas, Nicholas, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW
  Cc: Li, Sun peng (Leo), Wentland, Harry

Not an expert on Freesync so maybe stupid question but from he comment 
looks like this pipe locking is only for the sake of Freesync mode there 
- why is it then called unconditionally w/o checking if you even run in 
Freesync mode ?

Andrey


On 12/06/2018 08:42 AM, Kazlauskas, Nicholas wrote:
> A reader/writer lock wouldn't help here, actually.
>
> Fast updates (and page flips in particular) need to lock the pipe as
> well, see: commit_planes_for_stream.
>
> Nicholas Kazlauskas

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] drm/amd/display: Add fast path for cursor plane updates
       [not found]                         ` <13f4b0bc-fcda-be57-2416-aa3ba5ff065c-5C7GfCeVMHo@public.gmane.org>
@ 2018-12-06 15:59                           ` Nicholas Kazlauskas
       [not found]                             ` <78e6ff7a-c7f4-324b-7fb2-9df282cb2c7e-5C7GfCeVMHo@public.gmane.org>
  0 siblings, 1 reply; 16+ messages in thread
From: Nicholas Kazlauskas @ 2018-12-06 15:59 UTC (permalink / raw)
  To: Grodzovsky, Andrey, Kazlauskas, Nicholas,
	amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW
  Cc: Li, Sun peng (Leo), Wentland, Harry

On 2018-12-06 10:36 a.m., Grodzovsky, Andrey wrote:
> Not an expert on Freesync so maybe stupid question but from he comment
> looks like this pipe locking is only for the sake of Freesync mode there
> - why is it then called unconditionally w/o checking if you even run in
> Freesync mode ?
> 
> Andrey
I don't think there's any sort of state tracking done in DC for FreeSync 
state changes on a stream so it was probably easier to do this. Given 
the age of the pipe lock addition I would be worried about other 
behavior relying on it now if it was just conditionally done on that.

In practice I think the worst case effect of guarding these calls with a 
mutex is <100us of delay. That's for two separate threads issuing two 
commits at the same time with the planes changed.

Nicholas Kazlauskas

> 
> 
> On 12/06/2018 08:42 AM, Kazlauskas, Nicholas wrote:
>> A reader/writer lock wouldn't help here, actually.
>>
>> Fast updates (and page flips in particular) need to lock the pipe as
>> well, see: commit_planes_for_stream.
>>
>> Nicholas Kazlauskas
> 
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] drm/amd/display: Add fast path for cursor plane updates
       [not found]                             ` <78e6ff7a-c7f4-324b-7fb2-9df282cb2c7e-5C7GfCeVMHo@public.gmane.org>
@ 2018-12-06 16:12                               ` Grodzovsky, Andrey
  0 siblings, 0 replies; 16+ messages in thread
From: Grodzovsky, Andrey @ 2018-12-06 16:12 UTC (permalink / raw)
  To: Kazlauskas, Nicholas; +Cc: Li, Sun peng (Leo), Wentland, Harry

Ok - the change is Acked-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com>

Andrey


On 12/06/2018 10:59 AM, Nicholas Kazlauskas wrote:
> On 2018-12-06 10:36 a.m., Grodzovsky, Andrey wrote:
>> Not an expert on Freesync so maybe stupid question but from he comment
>> looks like this pipe locking is only for the sake of Freesync mode there
>> - why is it then called unconditionally w/o checking if you even run in
>> Freesync mode ?
>>
>> Andrey
> I don't think there's any sort of state tracking done in DC for FreeSync
> state changes on a stream so it was probably easier to do this. Given
> the age of the pipe lock addition I would be worried about other
> behavior relying on it now if it was just conditionally done on that.
>
> In practice I think the worst case effect of guarding these calls with a
> mutex is <100us of delay. That's for two separate threads issuing two
> commits at the same time with the planes changed.
>
> Nicholas Kazlauskas
>
>>
>> On 12/06/2018 08:42 AM, Kazlauskas, Nicholas wrote:
>>> A reader/writer lock wouldn't help here, actually.
>>>
>>> Fast updates (and page flips in particular) need to lock the pipe as
>>> well, see: commit_planes_for_stream.
>>>
>>> Nicholas Kazlauskas
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] drm/amd/display: Add fast path for cursor plane updates
       [not found] ` <20181205195907.9501-1-nicholas.kazlauskas-5C7GfCeVMHo@public.gmane.org>
  2018-12-05 20:26   ` Grodzovsky, Andrey
@ 2018-12-11 22:30   ` Li, Sun peng (Leo)
  2018-12-13 15:48   ` Michel Dänzer
  2 siblings, 0 replies; 16+ messages in thread
From: Li, Sun peng (Leo) @ 2018-12-11 22:30 UTC (permalink / raw)
  To: Kazlauskas, Nicholas, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW
  Cc: Wentland, Harry



On 2018-12-05 2:59 p.m., Nicholas Kazlauskas wrote:
> [Why]
> Legacy cursor plane updates from drm helpers go through the full
> atomic codepath. A high volume of cursor updates through this slow
> code path can cause subsequent page-flips to skip vblank intervals
> since each individual update is slow.
> 
> This problem is particularly noticeable for the compton compositor.
> 
> [How]
> A fast path for cursor plane updates is added by using DRM asynchronous
> commit support provided by async_check and async_update. These don't do
> a full state/flip_done dependency stall and they don't block other
> commit work.
> 
> However, DC still expects itself to be single-threaded for anything
> that can issue register writes. Screen corruption or hangs can occur
> if write sequences overlap. Every call that potentially perform
> register writes needs to be guarded for asynchronous updates to work.
> The dc_lock mutex was added for this.
> 
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=106175
> 
> Cc: Leo Li <sunpeng.li@amd.com>
> Cc: Harry Wentland <harry.wentland@amd.com>
> Signed-off-by: Nicholas Kazlauskas <nicholas.kazlauskas@amd.com>

I just skimmed through the calls to DC, but I trust you've locked on
everything that can potentially modify registers.

Might be a good future task to document DC interfaces that are not
thread safe, since it's currently not clear. Although this change does
shine light on that :)

Thanks,

Reviewed-by Leo Li <sunpeng.li@amd.com>

> ---
>   .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 67 ++++++++++++++++++-
>   .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h |  8 +++
>   2 files changed, 73 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> index 23d61570df17..4df14a50a8ac 100644
> --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> @@ -57,6 +57,7 @@
>   
>   #include <drm/drmP.h>
>   #include <drm/drm_atomic.h>
> +#include <drm/drm_atomic_uapi.h>
>   #include <drm/drm_atomic_helper.h>
>   #include <drm/drm_dp_mst_helper.h>
>   #include <drm/drm_fb_helper.h>
> @@ -133,6 +134,8 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state);
>   static int amdgpu_dm_atomic_check(struct drm_device *dev,
>   				  struct drm_atomic_state *state);
>   
> +static void handle_cursor_update(struct drm_plane *plane,
> +				 struct drm_plane_state *old_plane_state);
>   
>   
>   
> @@ -402,6 +405,8 @@ static int amdgpu_dm_init(struct amdgpu_device *adev)
>   	/* Zero all the fields */
>   	memset(&init_data, 0, sizeof(init_data));
>   
> +	mutex_init(&adev->dm.dc_lock);
> +
>   	if(amdgpu_dm_irq_init(adev)) {
>   		DRM_ERROR("amdgpu: failed to initialize DM IRQ support.\n");
>   		goto error;
> @@ -516,6 +521,9 @@ static void amdgpu_dm_fini(struct amdgpu_device *adev)
>   	/* DC Destroy TODO: Replace destroy DAL */
>   	if (adev->dm.dc)
>   		dc_destroy(&adev->dm.dc);
> +
> +	mutex_destroy(&adev->dm.dc_lock);
> +
>   	return;
>   }
>   
> @@ -3615,10 +3623,43 @@ static int dm_plane_atomic_check(struct drm_plane *plane,
>   	return -EINVAL;
>   }
>   
> +static int dm_plane_atomic_async_check(struct drm_plane *plane,
> +				       struct drm_plane_state *new_plane_state)
> +{
> +	/* Only support async updates on cursor planes. */
> +	if (plane->type != DRM_PLANE_TYPE_CURSOR)
> +		return -EINVAL;
> +
> +	return 0;
> +}
> +
> +static void dm_plane_atomic_async_update(struct drm_plane *plane,
> +					 struct drm_plane_state *new_state)
> +{
> +	struct drm_plane_state *old_state =
> +		drm_atomic_get_old_plane_state(new_state->state, plane);
> +
> +	if (plane->state->fb != new_state->fb)
> +		drm_atomic_set_fb_for_plane(plane->state, new_state->fb);
> +
> +	plane->state->src_x = new_state->src_x;
> +	plane->state->src_y = new_state->src_y;
> +	plane->state->src_w = new_state->src_w;
> +	plane->state->src_h = new_state->src_h;
> +	plane->state->crtc_x = new_state->crtc_x;
> +	plane->state->crtc_y = new_state->crtc_y;
> +	plane->state->crtc_w = new_state->crtc_w;
> +	plane->state->crtc_h = new_state->crtc_h;
> +
> +	handle_cursor_update(plane, old_state);
> +}
> +
>   static const struct drm_plane_helper_funcs dm_plane_helper_funcs = {
>   	.prepare_fb = dm_plane_helper_prepare_fb,
>   	.cleanup_fb = dm_plane_helper_cleanup_fb,
>   	.atomic_check = dm_plane_atomic_check,
> +	.atomic_async_check = dm_plane_atomic_async_check,
> +	.atomic_async_update = dm_plane_atomic_async_update
>   };
>   
>   /*
> @@ -4307,6 +4348,7 @@ static int get_cursor_position(struct drm_plane *plane, struct drm_crtc *crtc,
>   static void handle_cursor_update(struct drm_plane *plane,
>   				 struct drm_plane_state *old_plane_state)
>   {
> +	struct amdgpu_device *adev = plane->dev->dev_private;
>   	struct amdgpu_framebuffer *afb = to_amdgpu_framebuffer(plane->state->fb);
>   	struct drm_crtc *crtc = afb ? plane->state->crtc : old_plane_state->crtc;
>   	struct dm_crtc_state *crtc_state = crtc ? to_dm_crtc_state(crtc->state) : NULL;
> @@ -4331,9 +4373,12 @@ static void handle_cursor_update(struct drm_plane *plane,
>   
>   	if (!position.enable) {
>   		/* turn off cursor */
> -		if (crtc_state && crtc_state->stream)
> +		if (crtc_state && crtc_state->stream) {
> +			mutex_lock(&adev->dm.dc_lock);
>   			dc_stream_set_cursor_position(crtc_state->stream,
>   						      &position);
> +			mutex_unlock(&adev->dm.dc_lock);
> +		}
>   		return;
>   	}
>   
> @@ -4351,6 +4396,7 @@ static void handle_cursor_update(struct drm_plane *plane,
>   	attributes.pitch = attributes.width;
>   
>   	if (crtc_state->stream) {
> +		mutex_lock(&adev->dm.dc_lock);
>   		if (!dc_stream_set_cursor_attributes(crtc_state->stream,
>   							 &attributes))
>   			DRM_ERROR("DC failed to set cursor attributes\n");
> @@ -4358,6 +4404,7 @@ static void handle_cursor_update(struct drm_plane *plane,
>   		if (!dc_stream_set_cursor_position(crtc_state->stream,
>   						   &position))
>   			DRM_ERROR("DC failed to set cursor position\n");
> +		mutex_unlock(&adev->dm.dc_lock);
>   	}
>   }
>   
> @@ -4573,6 +4620,7 @@ static void amdgpu_dm_do_flip(struct drm_crtc *crtc,
>   				&acrtc_state->stream->vrr_infopacket;
>   	}
>   
> +	mutex_lock(&adev->dm.dc_lock);
>   	dc_commit_updates_for_stream(adev->dm.dc,
>   					     surface_updates,
>   					     1,
> @@ -4580,6 +4628,7 @@ static void amdgpu_dm_do_flip(struct drm_crtc *crtc,
>   					     &stream_update,
>   					     &surface_updates->surface,
>   					     state);
> +	mutex_unlock(&adev->dm.dc_lock);
>   
>   	DRM_DEBUG_DRIVER("%s Flipping to hi: 0x%x, low: 0x%x \n",
>   			 __func__,
> @@ -4594,6 +4643,7 @@ static void amdgpu_dm_do_flip(struct drm_crtc *crtc,
>    * with a dc_plane_state and follow the atomic model a bit more closely here.
>    */
>   static bool commit_planes_to_stream(
> +		struct amdgpu_display_manager *dm,
>   		struct dc *dc,
>   		struct dc_plane_state **plane_states,
>   		uint8_t new_plane_count,
> @@ -4670,11 +4720,13 @@ static bool commit_planes_to_stream(
>   		updates[i].scaling_info = &scaling_info[i];
>   	}
>   
> +	mutex_lock(&dm->dc_lock);
>   	dc_commit_updates_for_stream(
>   			dc,
>   			updates,
>   			new_plane_count,
>   			dc_stream, stream_update, plane_states, state);
> +	mutex_unlock(&dm->dc_lock);
>   
>   	kfree(flip_addr);
>   	kfree(plane_info);
> @@ -4780,7 +4832,8 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state,
>   
>   		dc_stream_attach->abm_level = acrtc_state->abm_level;
>   
> -		if (false == commit_planes_to_stream(dm->dc,
> +		if (false == commit_planes_to_stream(dm,
> +							dm->dc,
>   							plane_states_constructed,
>   							planes_count,
>   							acrtc_state,
> @@ -4950,7 +5003,9 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state)
>   
>   	if (dc_state) {
>   		dm_enable_per_frame_crtc_master_sync(dc_state);
> +		mutex_lock(&dm->dc_lock);
>   		WARN_ON(!dc_commit_state(dm->dc, dc_state));
> +		mutex_unlock(&dm->dc_lock);
>   	}
>   
>   	for_each_new_crtc_in_state(state, crtc, new_crtc_state, i) {
> @@ -5012,6 +5067,7 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state)
>   
>   		/*TODO How it works with MPO ?*/
>   		if (!commit_planes_to_stream(
> +				dm,
>   				dm->dc,
>   				status->plane_states,
>   				status->plane_count,
> @@ -5904,6 +5960,13 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev,
>   			ret = -EINVAL;
>   			goto fail;
>   		}
> +	} else if (state->legacy_cursor_update) {
> +		/*
> +		 * This is a fast cursor update coming from the plane update
> +		 * helper, check if it can be done asynchronously for better
> +		 * performance.
> +		 */
> +		state->async_update = !drm_atomic_helper_async_check(dev, state);
>   	}
>   
>   	/* Must be success */
> diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
> index 4326dc256491..25bb91ee80ba 100644
> --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
> +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
> @@ -134,6 +134,14 @@ struct amdgpu_display_manager {
>   
>   	struct drm_modeset_lock atomic_obj_lock;
>   
> +	/**
> +	 * @dc_lock:
> +	 *
> +	 * Guards access to DC functions that can issue register write
> +	 * sequences.
> +	 */
> +	struct mutex dc_lock;
> +
>   	/**
>   	 * @irq_handler_list_low_tab:
>   	 *
> 
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] drm/amd/display: Add fast path for cursor plane updates
       [not found] ` <20181205195907.9501-1-nicholas.kazlauskas-5C7GfCeVMHo@public.gmane.org>
  2018-12-05 20:26   ` Grodzovsky, Andrey
  2018-12-11 22:30   ` Li, Sun peng (Leo)
@ 2018-12-13 15:48   ` Michel Dänzer
       [not found]     ` <d9cde6c1-90e1-da3e-7edd-eb3570da666c-otUistvHUpPR7s880joybQ@public.gmane.org>
  2 siblings, 1 reply; 16+ messages in thread
From: Michel Dänzer @ 2018-12-13 15:48 UTC (permalink / raw)
  To: Nicholas Kazlauskas
  Cc: Leo Li, Harry Wentland, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

[-- Attachment #1: Type: text/plain, Size: 1842 bytes --]

On 2018-12-05 8:59 p.m., Nicholas Kazlauskas wrote:
> [Why]
> Legacy cursor plane updates from drm helpers go through the full
> atomic codepath. A high volume of cursor updates through this slow
> code path can cause subsequent page-flips to skip vblank intervals
> since each individual update is slow.
> 
> This problem is particularly noticeable for the compton compositor.
> 
> [How]
> A fast path for cursor plane updates is added by using DRM asynchronous
> commit support provided by async_check and async_update. These don't do
> a full state/flip_done dependency stall and they don't block other
> commit work.
> 
> However, DC still expects itself to be single-threaded for anything
> that can issue register writes. Screen corruption or hangs can occur
> if write sequences overlap. Every call that potentially perform
> register writes needs to be guarded for asynchronous updates to work.
> The dc_lock mutex was added for this.
> 
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=106175
> 
> Cc: Leo Li <sunpeng.li-5C7GfCeVMHo@public.gmane.org>
> Cc: Harry Wentland <harry.wentland-5C7GfCeVMHo@public.gmane.org>
> Signed-off-by: Nicholas Kazlauskas <nicholas.kazlauskas-5C7GfCeVMHo@public.gmane.org>

Looks like this change introduced (or at least exposed) a reference
counting bug resulting in use-after-free when Xorg shuts down[0]. See
the attached dmesg excerpt (note that I wrapped the !bo->pin_count check
in amdgpu_bo_unpin in WARN_ON_ONCE).


[0] Only with
https://gitlab.freedesktop.org/xorg/driver/xf86-video-amdgpu/commit/0d60233d26ec70d4e1faa343b438e33829c6d5e4
, i.e. alternating between two BOs for the HW cursor, instead of always
using the same one.


-- 
Earthling Michel Dänzer               |               http://www.amd.com
Libre software enthusiast             |             Mesa and X developer

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #2: kern.log --]
[-- Type: text/x-log; name="kern.log", Size: 42344 bytes --]

Dec 13 16:35:07 kaveri kernel: [   52.603334] WARNING: CPU: 13 PID: 2010 at drivers/gpu/drm//amd/amdgpu/amdgpu_object.c:915 amdgpu_bo_unpin+0x24e/0x340 [amdgpu]
Dec 13 16:35:07 kaveri kernel: [   52.603454] Modules linked in: fuse(E) ipt_MASQUERADE(E) nf_conntrack_netlink(E) xfrm_user(E) xfrm_algo(E) nft_counter(E) nft_chain_nat_ipv4(E) nf_nat_ipv4(E) xt_addrtype(E) nft_compat(E) nf_tables(E) nfnetlink(E) xt_conntrack(E) nf_nat(E) nf_conntrack(E) nf_defrag_ipv6(E) nf_defrag_ipv4(E) libcrc32c(E) br_netfilter(E) bridge(E) stp(E) llc(E) overlay(E) lz4(E) lz4_compress(E) cpufreq_powersave(E) cpufreq_userspace(E) cpufreq_conservative(E) binfmt_misc(E) amdgpu(OE) nls_ascii(E) nls_cp437(E) vfat(E) fat(E) chash(OE) gpu_sched(OE) edac_mce_amd(E) kvm(E) irqbypass(E) crct10dif_pclmul(E) crc32_pclmul(E) ghash_clmulni_intel(E) radeon(OE) snd_hda_codec_realtek(E) snd_hda_codec_generic(E) snd_hda_codec_hdmi(E) ttm(OE) wmi_bmof(E) snd_hda_intel(E) drm_kms_helper(OE) aesni_intel(E) snd_hda_codec(E) efi_pstore(E) aes_x86_64(E) snd_hda_core(E) crypto_simd(E) snd_hwdep(E) r8169(E) cryptd(E) drm(OE) glue_helper(E) pcspkr(E) efivars(E) sg(E) k10temp(E) snd_pcm(E) i2c_algo_bit(E) libphy(E)
Dec 13 16:35:07 kaveri kernel: [   52.603509]  fb_sys_fops(E) syscopyarea(E) ccp(E) snd_timer(E) sysfillrect(E) sp5100_tco(E) sysimgblt(E) snd(E) soundcore(E) rng_core(E) i2c_piix4(E) wmi(E) pcc_cpufreq(E) button(E) acpi_cpufreq(E) tcp_bbr(E) sch_fq(E) nct6775(E) hwmon_vid(E) sunrpc(E) efivarfs(E) ip_tables(E) x_tables(E) autofs4(E) ext4(E) crc32c_generic(E) crc16(E) mbcache(E) jbd2(E) fscrypto(E) dm_mod(E) raid10(E) raid1(E) raid0(E) multipath(E) linear(E) md_mod(E) sd_mod(E) evdev(E) hid_generic(E) usbhid(E) hid(E) ahci(E) libahci(E) xhci_pci(E) libata(E) xhci_hcd(E) crc32c_intel(E) scsi_mod(E) usbcore(E) gpio_amdpt(E) gpio_generic(E)
Dec 13 16:35:07 kaveri kernel: [   52.603555] CPU: 13 PID: 2010 Comm: Xorg Tainted: G           OE     4.20.0-rc3+ #118
Dec 13 16:35:07 kaveri kernel: [   52.603559] Hardware name: Micro-Star International Co., Ltd. MS-7A34/B350 TOMAHAWK (MS-7A34), BIOS 1.80 09/13/2017
Dec 13 16:35:07 kaveri kernel: [   52.603628] RIP: 0010:amdgpu_bo_unpin+0x24e/0x340 [amdgpu]
Dec 13 16:35:07 kaveri kernel: [   52.603633] Code: 3c 11 00 0f 85 ff 00 00 00 49 8b bd 28 d4 ff ff 4c 89 e2 48 c7 c6 00 9a eb c2 89 04 24 e8 6a d4 5a ca 8b 04 24 e9 a9 fe ff ff <0f> 0b 49 8d bd 28 d4 ff ff 48 b8 00 00 00 00 00 fc ff df 48 89 fa
Dec 13 16:35:07 kaveri kernel: [   52.603636] RSP: 0018:ffff8883d403f3d8 EFLAGS: 00010246
Dec 13 16:35:07 kaveri kernel: [   52.603641] RAX: 0000000000000000 RBX: 1ffff1107a807e96 RCX: ffff8883c445e650
Dec 13 16:35:07 kaveri kernel: [   52.603645] RDX: 1ffff1107888bd37 RSI: 0000000000000000 RDI: ffff8883c445e9b8
Dec 13 16:35:07 kaveri kernel: [   52.603648] RBP: ffff8883c445e9e8 R08: fffffbfff83643a7 R09: fffffbfff83643a6
Dec 13 16:35:07 kaveri kernel: [   52.603652] R10: fffffbfff83643a6 R11: ffffffffc1b21d33 R12: ffff8883c445e600
Dec 13 16:35:07 kaveri kernel: [   52.603655] R13: ffff8883b1de2bd8 R14: 1ffff1107a807e7e R15: ffff8883b1de2bd8
Dec 13 16:35:07 kaveri kernel: [   52.603659] FS:  00007fc8b8929940(0000) GS:ffff8883ee140000(0000) knlGS:0000000000000000
Dec 13 16:35:07 kaveri kernel: [   52.603663] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
Dec 13 16:35:07 kaveri kernel: [   52.603666] CR2: 00007fc8ba821178 CR3: 00000003b4e0a000 CR4: 00000000003406e0
Dec 13 16:35:07 kaveri kernel: [   52.603669] Call Trace:
Dec 13 16:35:07 kaveri kernel: [   52.603741]  ? amdgpu_bo_unref+0x70/0x70 [amdgpu]
Dec 13 16:35:07 kaveri kernel: [   52.603836]  dm_plane_helper_cleanup_fb+0x184/0x470 [amdgpu]
Dec 13 16:35:07 kaveri kernel: [   52.603924]  ? perf_trace_amdgpu_dc_performance+0x720/0x720 [amdgpu]
Dec 13 16:35:07 kaveri kernel: [   52.603932]  ? _raw_spin_unlock_irqrestore+0x3c/0x50
Dec 13 16:35:07 kaveri kernel: [   52.603940]  ? lockdep_hardirqs_on+0x37c/0x560
Dec 13 16:35:07 kaveri kernel: [   52.603954]  drm_atomic_helper_cleanup_planes+0x175/0x2c0 [drm_kms_helper]
Dec 13 16:35:07 kaveri kernel: [   52.604044]  amdgpu_dm_atomic_commit_tail+0x1b84/0x3310 [amdgpu]
Dec 13 16:35:07 kaveri kernel: [   52.604054]  ? wait_for_completion_io_timeout+0x390/0x390
Dec 13 16:35:07 kaveri kernel: [   52.604060]  ? rwlock_bug.part.2+0x90/0x90
Dec 13 16:35:07 kaveri kernel: [   52.604150]  ? amdgpu_dm_do_flip+0xe50/0xe50 [amdgpu]
Dec 13 16:35:07 kaveri kernel: [   52.604163]  ? drm_atomic_helper_wait_for_dependencies+0x494/0x7e0 [drm_kms_helper]
Dec 13 16:35:07 kaveri kernel: [   52.604178]  commit_tail+0x96/0xe0 [drm_kms_helper]
Dec 13 16:35:07 kaveri kernel: [   52.604190]  drm_atomic_helper_commit+0x177/0x240 [drm_kms_helper]
Dec 13 16:35:07 kaveri kernel: [   52.604203]  drm_atomic_helper_disable_plane+0x109/0x1d0 [drm_kms_helper]
Dec 13 16:35:07 kaveri kernel: [   52.604224]  drm_mode_cursor_universal+0x3e6/0xb30 [drm]
Dec 13 16:35:07 kaveri kernel: [   52.604250]  ? setplane_internal+0x330/0x330 [drm]
Dec 13 16:35:07 kaveri kernel: [   52.604260]  ? ww_mutex_lock_interruptible+0x34/0xa0
Dec 13 16:35:07 kaveri kernel: [   52.604264]  ? _cond_resched+0x15/0x30
Dec 13 16:35:07 kaveri kernel: [   52.604268]  ? ww_mutex_lock_interruptible+0x34/0xa0
Dec 13 16:35:07 kaveri kernel: [   52.604290]  drm_mode_cursor_common+0x4d0/0x8c0 [drm]
Dec 13 16:35:07 kaveri kernel: [   52.604297]  ? mark_held_locks+0x140/0x140
Dec 13 16:35:07 kaveri kernel: [   52.604303]  ? __read_once_size_nocheck.constprop.7+0x10/0x10
Dec 13 16:35:07 kaveri kernel: [   52.604322]  ? drm_mode_cursor_universal+0xb30/0xb30 [drm]
Dec 13 16:35:07 kaveri kernel: [   52.604348]  ? drm_dev_exit+0x5/0x30 [drm]
Dec 13 16:35:07 kaveri kernel: [   52.604367]  ? drm_mode_setplane+0x850/0x850 [drm]
Dec 13 16:35:07 kaveri kernel: [   52.604386]  drm_mode_cursor_ioctl+0x86/0xc0 [drm]
Dec 13 16:35:07 kaveri kernel: [   52.604405]  ? drm_mode_setplane+0x850/0x850 [drm]
Dec 13 16:35:07 kaveri kernel: [   52.604424]  ? drm_is_current_master+0x6a/0x110 [drm]
Dec 13 16:35:07 kaveri kernel: [   52.604444]  drm_ioctl_kernel+0x1c6/0x260 [drm]
Dec 13 16:35:07 kaveri kernel: [   52.604462]  ? drm_setversion+0x800/0x800 [drm]
Dec 13 16:35:07 kaveri kernel: [   52.604485]  drm_ioctl+0x403/0x850 [drm]
Dec 13 16:35:07 kaveri kernel: [   52.604506]  ? drm_mode_setplane+0x850/0x850 [drm]
Dec 13 16:35:07 kaveri kernel: [   52.604524]  ? drm_ioctl_kernel+0x260/0x260 [drm]
Dec 13 16:35:07 kaveri kernel: [   52.604528]  ? find_held_lock+0x33/0x1c0
Dec 13 16:35:07 kaveri kernel: [   52.604535]  ? __pm_runtime_resume+0xb2/0xf0
Dec 13 16:35:07 kaveri kernel: [   52.604543]  ? lock_downgrade+0x5d0/0x5d0
Dec 13 16:35:07 kaveri kernel: [   52.604547]  ? lock_acquire+0x103/0x2c0
Dec 13 16:35:07 kaveri kernel: [   52.604552]  ? __pm_runtime_resume+0x98/0xf0
Dec 13 16:35:07 kaveri kernel: [   52.604557]  ? _raw_spin_unlock_irqrestore+0x3c/0x50
Dec 13 16:35:07 kaveri kernel: [   52.604563]  ? lockdep_hardirqs_on+0x37c/0x560
Dec 13 16:35:07 kaveri kernel: [   52.604633]  amdgpu_drm_ioctl+0xcc/0x1b0 [amdgpu]
Dec 13 16:35:07 kaveri kernel: [   52.604642]  do_vfs_ioctl+0x193/0xfd0
Dec 13 16:35:07 kaveri kernel: [   52.604647]  ? lock_downgrade+0x5d0/0x5d0
Dec 13 16:35:07 kaveri kernel: [   52.604653]  ? ioctl_preallocate+0x1b0/0x1b0
Dec 13 16:35:07 kaveri kernel: [   52.604662]  ? __fget+0x287/0x3e0
Dec 13 16:35:07 kaveri kernel: [   52.604670]  ? __ia32_sys_dup2+0x2e0/0x2e0
Dec 13 16:35:07 kaveri kernel: [   52.604676]  ? blkg_prfill_rwstat_field_recursive+0xf0/0xf0
Dec 13 16:35:07 kaveri kernel: [   52.604681]  ? _raw_spin_unlock_irq+0x29/0x30
Dec 13 16:35:07 kaveri kernel: [   52.604691]  ksys_ioctl+0x60/0x90
Dec 13 16:35:07 kaveri kernel: [   52.604697]  __x64_sys_ioctl+0x6f/0xb0
Dec 13 16:35:07 kaveri kernel: [   52.604701]  ? lockdep_hardirqs_on+0x37c/0x560
Dec 13 16:35:07 kaveri kernel: [   52.604707]  do_syscall_64+0x9c/0x3d0
Dec 13 16:35:07 kaveri kernel: [   52.604713]  entry_SYSCALL_64_after_hwframe+0x49/0xbe
Dec 13 16:35:07 kaveri kernel: [   52.604717] RIP: 0033:0x7fc8b94e07e7
Dec 13 16:35:07 kaveri kernel: [   52.604721] Code: 00 00 90 48 8b 05 a9 a6 0c 00 64 c7 00 26 00 00 00 48 c7 c0 ff ff ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 b8 10 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 79 a6 0c 00 f7 d8 64 89 01 48
Dec 13 16:35:07 kaveri kernel: [   52.604725] RSP: 002b:00007ffd15f91888 EFLAGS: 00000246 ORIG_RAX: 0000000000000010
Dec 13 16:35:07 kaveri kernel: [   52.604729] RAX: ffffffffffffffda RBX: 000055b030265b60 RCX: 00007fc8b94e07e7
Dec 13 16:35:07 kaveri kernel: [   52.604732] RDX: 00007ffd15f918c0 RSI: 00000000c01c64a3 RDI: 000000000000000d
Dec 13 16:35:07 kaveri kernel: [   52.604736] RBP: 00007ffd15f918b0 R08: 0000000000000080 R09: 000055b0307f8940
Dec 13 16:35:07 kaveri kernel: [   52.604739] R10: 00007fc8ba86b560 R11: 0000000000000246 R12: 00007ffd15f918c0
Dec 13 16:35:07 kaveri kernel: [   52.604742] R13: 00000000c01c64a3 R14: 000000000000000d R15: 0000000000000000
Dec 13 16:35:07 kaveri kernel: [   52.604755] irq event stamp: 626894
Dec 13 16:35:07 kaveri kernel: [   52.604760] hardirqs last  enabled at (626893): [<ffffffff8d3817ec>] _raw_spin_unlock_irqrestore+0x3c/0x50
Dec 13 16:35:07 kaveri kernel: [   52.604764] hardirqs last disabled at (626894): [<ffffffff8bc03552>] trace_hardirqs_off_thunk+0x1a/0x1c
Dec 13 16:35:07 kaveri kernel: [   52.604769] softirqs last  enabled at (626872): [<ffffffff8d6005d4>] __do_softirq+0x5d4/0x86e
Dec 13 16:35:07 kaveri kernel: [   52.604774] softirqs last disabled at (626861): [<ffffffff8bd3afc2>] irq_exit+0x1a2/0x1d0
Dec 13 16:35:07 kaveri kernel: [   52.604777] ---[ end trace 2ad62cfc2ba4843c ]---
Dec 13 16:35:07 kaveri kernel: [   52.604785] amdgpu 0000:23:00.0: 000000000f2d9ca9 unpin not necessary
Dec 13 16:35:07 kaveri kernel: [   52.788450] ==================================================================
Dec 13 16:35:07 kaveri kernel: [   52.788551] BUG: KASAN: use-after-free in drm_gem_object_release_handle+0x1a8/0x1d0 [drm]
Dec 13 16:35:07 kaveri kernel: [   52.788556] Read of size 8 at addr ffff8883c445e9f0 by task Xorg/2010
Dec 13 16:35:07 kaveri kernel: [   52.788559] 
Dec 13 16:35:07 kaveri kernel: [   52.788565] CPU: 13 PID: 2010 Comm: Xorg Tainted: G        W  OE     4.20.0-rc3+ #118
Dec 13 16:35:07 kaveri kernel: [   52.788569] Hardware name: Micro-Star International Co., Ltd. MS-7A34/B350 TOMAHAWK (MS-7A34), BIOS 1.80 09/13/2017
Dec 13 16:35:07 kaveri kernel: [   52.788573] Call Trace:
Dec 13 16:35:07 kaveri kernel: [   52.788581]  dump_stack+0x7c/0xc0
Dec 13 16:35:07 kaveri kernel: [   52.788588]  print_address_description+0x65/0x22e
Dec 13 16:35:07 kaveri kernel: [   52.788606]  ? drm_gem_object_release_handle+0x1a8/0x1d0 [drm]
Dec 13 16:35:07 kaveri kernel: [   52.788611]  kasan_report.cold.5+0x241/0x306
Dec 13 16:35:07 kaveri kernel: [   52.788630]  ? drm_gem_object_handle_put_unlocked+0x260/0x260 [drm]
Dec 13 16:35:07 kaveri kernel: [   52.788647]  drm_gem_object_release_handle+0x1a8/0x1d0 [drm]
Dec 13 16:35:07 kaveri kernel: [   52.788665]  ? drm_gem_object_handle_put_unlocked+0x260/0x260 [drm]
Dec 13 16:35:07 kaveri kernel: [   52.788670]  idr_for_each+0xef/0x1d0
Dec 13 16:35:07 kaveri kernel: [   52.788676]  ? idr_find+0x50/0x50
Dec 13 16:35:07 kaveri kernel: [   52.788684]  ? mark_held_locks+0xc1/0x140
Dec 13 16:35:07 kaveri kernel: [   52.788691]  ? _raw_spin_unlock_irqrestore+0x3c/0x50
Dec 13 16:35:07 kaveri kernel: [   52.788712]  drm_gem_release+0x1c/0x30 [drm]
Dec 13 16:35:07 kaveri kernel: [   52.788729]  drm_file_free.part.3+0x96b/0xe30 [drm]
Dec 13 16:35:07 kaveri kernel: [   52.788752]  drm_release+0x231/0x3f0 [drm]
Dec 13 16:35:07 kaveri kernel: [   52.788758]  ? fsnotify_first_mark+0x130/0x130
Dec 13 16:35:07 kaveri kernel: [   52.788776]  ? drm_lastclose+0x2c0/0x2c0 [drm]
Dec 13 16:35:07 kaveri kernel: [   52.788782]  ? security_file_free+0x3f/0x70
Dec 13 16:35:07 kaveri kernel: [   52.788791]  __fput+0x235/0x710
Dec 13 16:35:07 kaveri kernel: [   52.788800]  task_work_run+0x10e/0x180
Dec 13 16:35:07 kaveri kernel: [   52.788810]  do_exit+0x952/0x2880
Dec 13 16:35:07 kaveri kernel: [   52.788820]  ? mm_update_next_owner+0x600/0x600
Dec 13 16:35:07 kaveri kernel: [   52.788826]  ? __do_page_fault+0x472/0xaa0
Dec 13 16:35:07 kaveri kernel: [   52.788834]  ? lock_downgrade+0x5d0/0x5d0
Dec 13 16:35:07 kaveri kernel: [   52.788839]  ? handle_mm_fault+0x4db/0x750
Dec 13 16:35:07 kaveri kernel: [   52.788850]  do_group_exit+0xf0/0x2e0
Dec 13 16:35:07 kaveri kernel: [   52.788857]  __x64_sys_exit_group+0x3a/0x50
Dec 13 16:35:07 kaveri kernel: [   52.788862]  do_syscall_64+0x9c/0x3d0
Dec 13 16:35:07 kaveri kernel: [   52.788868]  entry_SYSCALL_64_after_hwframe+0x49/0xbe
Dec 13 16:35:07 kaveri kernel: [   52.788872] RIP: 0033:0x7fc8b94b6d76
Dec 13 16:35:07 kaveri kernel: [   52.788880] Code: Bad RIP value.
Dec 13 16:35:07 kaveri kernel: [   52.788883] RSP: 002b:00007ffd15f91bb8 EFLAGS: 00000246 ORIG_RAX: 00000000000000e7
Dec 13 16:35:07 kaveri kernel: [   52.788888] RAX: ffffffffffffffda RBX: 00007fc8b95a7760 RCX: 00007fc8b94b6d76
Dec 13 16:35:07 kaveri kernel: [   52.788892] RDX: 0000000000000000 RSI: 000000000000003c RDI: 0000000000000000
Dec 13 16:35:07 kaveri kernel: [   52.788895] RBP: 0000000000000000 R08: 00000000000000e7 R09: fffffffffffffd58
Dec 13 16:35:07 kaveri kernel: [   52.788899] R10: 0000000000000007 R11: 0000000000000246 R12: 00007fc8b95a7760
Dec 13 16:35:07 kaveri kernel: [   52.788902] R13: 00000000000004b3 R14: 00007fc8b95b0428 R15: 0000000000000000
Dec 13 16:35:07 kaveri kernel: [   52.788914] 
Dec 13 16:35:07 kaveri kernel: [   52.788918] Allocated by task 2010:
Dec 13 16:35:07 kaveri kernel: [   52.788922]  kasan_kmalloc+0xbf/0xe0
Dec 13 16:35:07 kaveri kernel: [   52.788926]  kmem_cache_alloc_trace+0x12d/0x290
Dec 13 16:35:07 kaveri kernel: [   52.788998]  amdgpu_bo_do_create+0x25b/0x1050 [amdgpu]
Dec 13 16:35:07 kaveri kernel: [   52.789066]  amdgpu_bo_create+0xa3/0xa00 [amdgpu]
Dec 13 16:35:07 kaveri kernel: [   52.789134]  amdgpu_gem_object_create+0x140/0x240 [amdgpu]
Dec 13 16:35:07 kaveri kernel: [   52.789202]  amdgpu_gem_create_ioctl+0x4a8/0x8f0 [amdgpu]
Dec 13 16:35:07 kaveri kernel: [   52.789219]  drm_ioctl_kernel+0x1c6/0x260 [drm]
Dec 13 16:35:07 kaveri kernel: [   52.789236]  drm_ioctl+0x403/0x850 [drm]
Dec 13 16:35:07 kaveri kernel: [   52.789306]  amdgpu_drm_ioctl+0xcc/0x1b0 [amdgpu]
Dec 13 16:35:07 kaveri kernel: [   52.789311]  do_vfs_ioctl+0x193/0xfd0
Dec 13 16:35:07 kaveri kernel: [   52.789315]  ksys_ioctl+0x60/0x90
Dec 13 16:35:07 kaveri kernel: [   52.789319]  __x64_sys_ioctl+0x6f/0xb0
Dec 13 16:35:07 kaveri kernel: [   52.789323]  do_syscall_64+0x9c/0x3d0
Dec 13 16:35:07 kaveri kernel: [   52.789327]  entry_SYSCALL_64_after_hwframe+0x49/0xbe
Dec 13 16:35:07 kaveri kernel: [   52.789330] 
Dec 13 16:35:07 kaveri kernel: [   52.789333] Freed by task 2010:
Dec 13 16:35:07 kaveri kernel: [   52.789337]  __kasan_slab_free+0x125/0x170
Dec 13 16:35:07 kaveri kernel: [   52.789341]  kfree+0xe2/0x290
Dec 13 16:35:07 kaveri kernel: [   52.789349]  ttm_bo_release_list+0x3f3/0x560 [ttm]
Dec 13 16:35:07 kaveri kernel: [   52.789357]  ttm_bo_vm_close+0x34/0x70 [ttm]
Dec 13 16:35:07 kaveri kernel: [   52.789361]  remove_vma+0x92/0x130
Dec 13 16:35:07 kaveri kernel: [   52.789365]  exit_mmap+0x292/0x400
Dec 13 16:35:07 kaveri kernel: [   52.789369]  mmput+0xb2/0x390
Dec 13 16:35:07 kaveri kernel: [   52.789373]  do_exit+0x8c4/0x2880
Dec 13 16:35:07 kaveri kernel: [   52.789377]  do_group_exit+0xf0/0x2e0
Dec 13 16:35:07 kaveri kernel: [   52.789381]  __x64_sys_exit_group+0x3a/0x50
Dec 13 16:35:07 kaveri kernel: [   52.789385]  do_syscall_64+0x9c/0x3d0
Dec 13 16:35:07 kaveri kernel: [   52.789389]  entry_SYSCALL_64_after_hwframe+0x49/0xbe
Dec 13 16:35:07 kaveri kernel: [   52.789392] 
Dec 13 16:35:07 kaveri kernel: [   52.789395] The buggy address belongs to the object at ffff8883c445e600
Dec 13 16:35:07 kaveri kernel: [   52.789395]  which belongs to the cache kmalloc-2k of size 2048
Dec 13 16:35:07 kaveri kernel: [   52.789401] The buggy address is located 1008 bytes inside of
Dec 13 16:35:07 kaveri kernel: [   52.789401]  2048-byte region [ffff8883c445e600, ffff8883c445ee00)
Dec 13 16:35:07 kaveri kernel: [   52.789404] The buggy address belongs to the page:
Dec 13 16:35:07 kaveri kernel: [   52.789409] page:ffffea000f111600 count:1 mapcount:0 mapping:ffff8883ed80e800 index:0xffff8883c445f700 compound_mapcount: 0
Dec 13 16:35:07 kaveri kernel: [   52.789415] flags: 0x17fffc000010200(slab|head)
Dec 13 16:35:07 kaveri kernel: [   52.789420] raw: 017fffc000010200 ffffea000efef808 ffffea000eb08608 ffff8883ed80e800
Dec 13 16:35:07 kaveri kernel: [   52.789425] raw: ffff8883c445f700 00000000000f000e 00000001ffffffff 0000000000000000
Dec 13 16:35:07 kaveri kernel: [   52.789428] page dumped because: kasan: bad access detected
Dec 13 16:35:07 kaveri kernel: [   52.789431] 
Dec 13 16:35:07 kaveri kernel: [   52.789434] Memory state around the buggy address:
Dec 13 16:35:07 kaveri kernel: [   52.789438]  ffff8883c445e880: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
Dec 13 16:35:07 kaveri kernel: [   52.789442]  ffff8883c445e900: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
Dec 13 16:35:07 kaveri kernel: [   52.789446] >ffff8883c445e980: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
Dec 13 16:35:07 kaveri kernel: [   52.789449]                                                              ^
Dec 13 16:35:07 kaveri kernel: [   52.789453]  ffff8883c445ea00: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
Dec 13 16:35:07 kaveri kernel: [   52.789457]  ffff8883c445ea80: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
Dec 13 16:35:07 kaveri kernel: [   52.789460] ==================================================================
Dec 13 16:35:07 kaveri kernel: [   52.789463] Disabling lock debugging due to kernel taint
Dec 13 16:35:07 kaveri kernel: [   52.789487] ------------[ cut here ]------------
Dec 13 16:35:07 kaveri kernel: [   52.789490] refcount_t: increment on 0; use-after-free.
Dec 13 16:35:07 kaveri kernel: [   52.789504] WARNING: CPU: 13 PID: 2010 at lib/refcount.c:153 refcount_inc_checked+0x26/0x30
Dec 13 16:35:07 kaveri kernel: [   52.789506] Modules linked in: fuse(E) ipt_MASQUERADE(E) nf_conntrack_netlink(E) xfrm_user(E) xfrm_algo(E) nft_counter(E) nft_chain_nat_ipv4(E) nf_nat_ipv4(E) xt_addrtype(E) nft_compat(E) nf_tables(E) nfnetlink(E) xt_conntrack(E) nf_nat(E) nf_conntrack(E) nf_defrag_ipv6(E) nf_defrag_ipv4(E) libcrc32c(E) br_netfilter(E) bridge(E) stp(E) llc(E) overlay(E) lz4(E) lz4_compress(E) cpufreq_powersave(E) cpufreq_userspace(E) cpufreq_conservative(E) binfmt_misc(E) amdgpu(OE) nls_ascii(E) nls_cp437(E) vfat(E) fat(E) chash(OE) gpu_sched(OE) edac_mce_amd(E) kvm(E) irqbypass(E) crct10dif_pclmul(E) crc32_pclmul(E) ghash_clmulni_intel(E) radeon(OE) snd_hda_codec_realtek(E) snd_hda_codec_generic(E) snd_hda_codec_hdmi(E) ttm(OE) wmi_bmof(E) snd_hda_intel(E) drm_kms_helper(OE) aesni_intel(E) snd_hda_codec(E) efi_pstore(E) aes_x86_64(E) snd_hda_core(E) crypto_simd(E) snd_hwdep(E) r8169(E) cryptd(E) drm(OE) glue_helper(E) pcspkr(E) efivars(E) sg(E) k10temp(E) snd_pcm(E) i2c_algo_bit(E) libphy(E)
Dec 13 16:35:07 kaveri kernel: [   52.789543]  fb_sys_fops(E) syscopyarea(E) ccp(E) snd_timer(E) sysfillrect(E) sp5100_tco(E) sysimgblt(E) snd(E) soundcore(E) rng_core(E) i2c_piix4(E) wmi(E) pcc_cpufreq(E) button(E) acpi_cpufreq(E) tcp_bbr(E) sch_fq(E) nct6775(E) hwmon_vid(E) sunrpc(E) efivarfs(E) ip_tables(E) x_tables(E) autofs4(E) ext4(E) crc32c_generic(E) crc16(E) mbcache(E) jbd2(E) fscrypto(E) dm_mod(E) raid10(E) raid1(E) raid0(E) multipath(E) linear(E) md_mod(E) sd_mod(E) evdev(E) hid_generic(E) usbhid(E) hid(E) ahci(E) libahci(E) xhci_pci(E) libata(E) xhci_hcd(E) crc32c_intel(E) scsi_mod(E) usbcore(E) gpio_amdpt(E) gpio_generic(E)
Dec 13 16:35:07 kaveri kernel: [   52.789578] CPU: 13 PID: 2010 Comm: Xorg Tainted: G    B   W  OE     4.20.0-rc3+ #118
Dec 13 16:35:07 kaveri kernel: [   52.789581] Hardware name: Micro-Star International Co., Ltd. MS-7A34/B350 TOMAHAWK (MS-7A34), BIOS 1.80 09/13/2017
Dec 13 16:35:07 kaveri kernel: [   52.789585] RIP: 0010:refcount_inc_checked+0x26/0x30
Dec 13 16:35:07 kaveri kernel: [   52.789588] Code: 00 00 00 00 e8 ab fe ff ff 84 c0 74 01 c3 80 3d 20 40 b2 01 00 75 f6 48 c7 c7 00 c8 9e 8d c6 05 10 40 b2 01 01 e8 d5 49 57 ff <0f> 0b c3 0f 1f 80 00 00 00 00 41 56 41 55 41 54 41 89 fc 55 48 bd
Dec 13 16:35:07 kaveri kernel: [   52.789591] RSP: 0018:ffff8883d403f788 EFLAGS: 00010282
Dec 13 16:35:07 kaveri kernel: [   52.789594] RAX: 0000000000000000 RBX: ffff8883c445e650 RCX: ffffffff8c0634c0
Dec 13 16:35:07 kaveri kernel: [   52.789597] RDX: 0000000000000000 RSI: 0000000000000008 RDI: ffff8883ee15ea40
Dec 13 16:35:07 kaveri kernel: [   52.789600] RBP: ffff8883b1de2e78 R08: ffffed107dc2bd49 R09: ffffed107dc2bd48
Dec 13 16:35:07 kaveri kernel: [   52.789602] R10: ffffed107dc2bd48 R11: ffff8883ee15ea47 R12: ffff8883b1de2bd8
Dec 13 16:35:07 kaveri kernel: [   52.789605] R13: ffff8883c445e828 R14: ffff8883c445e67c R15: ffff8883b1de2fa0
Dec 13 16:35:07 kaveri kernel: [   52.789608] FS:  00007fc8b8929940(0000) GS:ffff8883ee140000(0000) knlGS:0000000000000000
Dec 13 16:35:07 kaveri kernel: [   52.789611] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
Dec 13 16:35:07 kaveri kernel: [   52.789614] CR2: 00007fc8b94b6d4c CR3: 00000003b7e14000 CR4: 00000000003406e0
Dec 13 16:35:07 kaveri kernel: [   52.789616] Call Trace:
Dec 13 16:35:07 kaveri kernel: [   52.789624]  ttm_bo_add_to_lru+0x242/0x570 [ttm]
Dec 13 16:35:07 kaveri kernel: [   52.789633]  ttm_eu_backoff_reservation+0x123/0x390 [ttm]
Dec 13 16:35:07 kaveri kernel: [   52.789707]  amdgpu_gem_object_close+0x22b/0x410 [amdgpu]
Dec 13 16:35:07 kaveri kernel: [   52.789781]  ? amdgpu_gem_object_open+0x5e0/0x5e0 [amdgpu]
Dec 13 16:35:07 kaveri kernel: [   52.789785]  ? do_raw_spin_unlock+0x54/0x220
Dec 13 16:35:07 kaveri kernel: [   52.789790]  ? console_unlock+0x45f/0xa40
Dec 13 16:35:07 kaveri kernel: [   52.789794]  ? vprintk_emit+0x203/0x3d0
Dec 13 16:35:07 kaveri kernel: [   52.789799]  ? irq_work_claim+0x40/0x70
Dec 13 16:35:07 kaveri kernel: [   52.789802]  ? irq_work_queue+0x9/0x90
Dec 13 16:35:07 kaveri kernel: [   52.789806]  ? wake_up_klogd+0x30/0x40
Dec 13 16:35:07 kaveri kernel: [   52.789809]  ? vprintk_emit+0x211/0x3d0
Dec 13 16:35:07 kaveri kernel: [   52.789812]  ? wake_up_klogd+0x30/0x40
Dec 13 16:35:07 kaveri kernel: [   52.789830]  ? drm_gem_object_release_handle+0x1a8/0x1d0 [drm]
Dec 13 16:35:07 kaveri kernel: [   52.789835]  ? print_shadow_for_address+0xbf/0x11c
Dec 13 16:35:07 kaveri kernel: [   52.789840]  ? kasan_end_report+0x33/0x4e
Dec 13 16:35:07 kaveri kernel: [   52.789842]  ? kasan_report.cold.5+0x75/0x306
Dec 13 16:35:07 kaveri kernel: [   52.789857]  drm_gem_object_release_handle+0x94/0x1d0 [drm]
Dec 13 16:35:07 kaveri kernel: [   52.789870]  ? drm_gem_object_handle_put_unlocked+0x260/0x260 [drm]
Dec 13 16:35:07 kaveri kernel: [   52.789874]  idr_for_each+0xef/0x1d0
Dec 13 16:35:07 kaveri kernel: [   52.789877]  ? idr_find+0x50/0x50
Dec 13 16:35:07 kaveri kernel: [   52.789881]  ? mark_held_locks+0xc1/0x140
Dec 13 16:35:07 kaveri kernel: [   52.789884]  ? _raw_spin_unlock_irqrestore+0x3c/0x50
Dec 13 16:35:07 kaveri kernel: [   52.789899]  drm_gem_release+0x1c/0x30 [drm]
Dec 13 16:35:07 kaveri kernel: [   52.789912]  drm_file_free.part.3+0x96b/0xe30 [drm]
Dec 13 16:35:07 kaveri kernel: [   52.789928]  drm_release+0x231/0x3f0 [drm]
Dec 13 16:35:07 kaveri kernel: [   52.789931]  ? fsnotify_first_mark+0x130/0x130
Dec 13 16:35:07 kaveri kernel: [   52.789945]  ? drm_lastclose+0x2c0/0x2c0 [drm]
Dec 13 16:35:07 kaveri kernel: [   52.789948]  ? security_file_free+0x3f/0x70
Dec 13 16:35:07 kaveri kernel: [   52.789953]  __fput+0x235/0x710
Dec 13 16:35:07 kaveri kernel: [   52.789957]  task_work_run+0x10e/0x180
Dec 13 16:35:07 kaveri kernel: [   52.789962]  do_exit+0x952/0x2880
Dec 13 16:35:07 kaveri kernel: [   52.789967]  ? mm_update_next_owner+0x600/0x600
Dec 13 16:35:07 kaveri kernel: [   52.789970]  ? __do_page_fault+0x472/0xaa0
Dec 13 16:35:07 kaveri kernel: [   52.789974]  ? lock_downgrade+0x5d0/0x5d0
Dec 13 16:35:07 kaveri kernel: [   52.789977]  ? handle_mm_fault+0x4db/0x750
Dec 13 16:35:07 kaveri kernel: [   52.789982]  do_group_exit+0xf0/0x2e0
Dec 13 16:35:07 kaveri kernel: [   52.789986]  __x64_sys_exit_group+0x3a/0x50
Dec 13 16:35:07 kaveri kernel: [   52.789989]  do_syscall_64+0x9c/0x3d0
Dec 13 16:35:07 kaveri kernel: [   52.789993]  entry_SYSCALL_64_after_hwframe+0x49/0xbe
Dec 13 16:35:07 kaveri kernel: [   52.789995] RIP: 0033:0x7fc8b94b6d76
Dec 13 16:35:07 kaveri kernel: [   52.789998] Code: Bad RIP value.
Dec 13 16:35:07 kaveri kernel: [   52.790000] RSP: 002b:00007ffd15f91bb8 EFLAGS: 00000246 ORIG_RAX: 00000000000000e7
Dec 13 16:35:07 kaveri kernel: [   52.790003] RAX: ffffffffffffffda RBX: 00007fc8b95a7760 RCX: 00007fc8b94b6d76
Dec 13 16:35:07 kaveri kernel: [   52.790005] RDX: 0000000000000000 RSI: 000000000000003c RDI: 0000000000000000
Dec 13 16:35:07 kaveri kernel: [   52.790007] RBP: 0000000000000000 R08: 00000000000000e7 R09: fffffffffffffd58
Dec 13 16:35:07 kaveri kernel: [   52.790009] R10: 0000000000000007 R11: 0000000000000246 R12: 00007fc8b95a7760
Dec 13 16:35:07 kaveri kernel: [   52.790011] R13: 00000000000004b3 R14: 00007fc8b95b0428 R15: 0000000000000000
Dec 13 16:35:07 kaveri kernel: [   52.790017] irq event stamp: 653260
Dec 13 16:35:07 kaveri kernel: [   52.790020] hardirqs last  enabled at (653259): [<ffffffff8c292cb7>] quarantine_put+0xb7/0x150
Dec 13 16:35:07 kaveri kernel: [   52.790023] hardirqs last disabled at (653260): [<ffffffff8d3815e2>] _raw_spin_lock_irqsave+0x12/0x40
Dec 13 16:35:07 kaveri kernel: [   52.790026] softirqs last  enabled at (652688): [<ffffffff8d6005d4>] __do_softirq+0x5d4/0x86e
Dec 13 16:35:07 kaveri kernel: [   52.790029] softirqs last disabled at (652681): [<ffffffff8bd3afc2>] irq_exit+0x1a2/0x1d0
Dec 13 16:35:07 kaveri kernel: [   52.790031] ---[ end trace 2ad62cfc2ba4843d ]---
Dec 13 16:35:07 kaveri kernel: [   52.790038] ------------[ cut here ]------------
Dec 13 16:35:07 kaveri kernel: [   52.790040] refcount_t: underflow; use-after-free.
Dec 13 16:35:07 kaveri kernel: [   52.790049] WARNING: CPU: 13 PID: 2010 at lib/refcount.c:187 refcount_sub_and_test_checked+0x147/0x160
Dec 13 16:35:07 kaveri kernel: [   52.790051] Modules linked in: fuse(E) ipt_MASQUERADE(E) nf_conntrack_netlink(E) xfrm_user(E) xfrm_algo(E) nft_counter(E) nft_chain_nat_ipv4(E) nf_nat_ipv4(E) xt_addrtype(E) nft_compat(E) nf_tables(E) nfnetlink(E) xt_conntrack(E) nf_nat(E) nf_conntrack(E) nf_defrag_ipv6(E) nf_defrag_ipv4(E) libcrc32c(E) br_netfilter(E) bridge(E) stp(E) llc(E) overlay(E) lz4(E) lz4_compress(E) cpufreq_powersave(E) cpufreq_userspace(E) cpufreq_conservative(E) binfmt_misc(E) amdgpu(OE) nls_ascii(E) nls_cp437(E) vfat(E) fat(E) chash(OE) gpu_sched(OE) edac_mce_amd(E) kvm(E) irqbypass(E) crct10dif_pclmul(E) crc32_pclmul(E) ghash_clmulni_intel(E) radeon(OE) snd_hda_codec_realtek(E) snd_hda_codec_generic(E) snd_hda_codec_hdmi(E) ttm(OE) wmi_bmof(E) snd_hda_intel(E) drm_kms_helper(OE) aesni_intel(E) snd_hda_codec(E) efi_pstore(E) aes_x86_64(E) snd_hda_core(E) crypto_simd(E) snd_hwdep(E) r8169(E) cryptd(E) drm(OE) glue_helper(E) pcspkr(E) efivars(E) sg(E) k10temp(E) snd_pcm(E) i2c_algo_bit(E) libphy(E)
Dec 13 16:35:07 kaveri kernel: [   52.790077]  fb_sys_fops(E) syscopyarea(E) ccp(E) snd_timer(E) sysfillrect(E) sp5100_tco(E) sysimgblt(E) snd(E) soundcore(E) rng_core(E) i2c_piix4(E) wmi(E) pcc_cpufreq(E) button(E) acpi_cpufreq(E) tcp_bbr(E) sch_fq(E) nct6775(E) hwmon_vid(E) sunrpc(E) efivarfs(E) ip_tables(E) x_tables(E) autofs4(E) ext4(E) crc32c_generic(E) crc16(E) mbcache(E) jbd2(E) fscrypto(E) dm_mod(E) raid10(E) raid1(E) raid0(E) multipath(E) linear(E) md_mod(E) sd_mod(E) evdev(E) hid_generic(E) usbhid(E) hid(E) ahci(E) libahci(E) xhci_pci(E) libata(E) xhci_hcd(E) crc32c_intel(E) scsi_mod(E) usbcore(E) gpio_amdpt(E) gpio_generic(E)
Dec 13 16:35:07 kaveri kernel: [   52.790102] CPU: 13 PID: 2010 Comm: Xorg Tainted: G    B   W  OE     4.20.0-rc3+ #118
Dec 13 16:35:07 kaveri kernel: [   52.790104] Hardware name: Micro-Star International Co., Ltd. MS-7A34/B350 TOMAHAWK (MS-7A34), BIOS 1.80 09/13/2017
Dec 13 16:35:07 kaveri kernel: [   52.790107] RIP: 0010:refcount_sub_and_test_checked+0x147/0x160
Dec 13 16:35:07 kaveri kernel: [   52.790110] Code: c2 44 29 e0 89 c5 85 d2 74 8a 80 3d d2 3e b2 01 00 74 04 31 c0 eb aa 48 c7 c7 60 c8 9e 8d c6 05 be 3e b2 01 01 e8 84 48 57 ff <0f> 0b 31 c0 eb 91 48 89 df e8 8b 0e ae ff e9 34 ff ff ff e8 61 42
Dec 13 16:35:07 kaveri kernel: [   52.790112] RSP: 0018:ffff8883d403f8f0 EFLAGS: 00010282
Dec 13 16:35:07 kaveri kernel: [   52.790115] RAX: 0000000000000000 RBX: ffff8883c445e678 RCX: ffffffff8c0634c0
Dec 13 16:35:07 kaveri kernel: [   52.790117] RDX: 0000000000000000 RSI: 0000000000000008 RDI: ffff8883ee15ea40
Dec 13 16:35:07 kaveri kernel: [   52.790119] RBP: 00000000ffffffff R08: ffffed107dc2bd49 R09: ffffed107dc2bd48
Dec 13 16:35:07 kaveri kernel: [   52.790121] R10: ffffed107dc2bd48 R11: ffff8883ee15ea47 R12: 0000000000000001
Dec 13 16:35:07 kaveri kernel: [   52.790123] R13: ffff8883d403f918 R14: 1ffff1107a807f1f R15: ffff8883c445e650
Dec 13 16:35:07 kaveri kernel: [   52.790126] FS:  00007fc8b8929940(0000) GS:ffff8883ee140000(0000) knlGS:0000000000000000
Dec 13 16:35:07 kaveri kernel: [   52.790128] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
Dec 13 16:35:07 kaveri kernel: [   52.790130] CR2: 00007fc8b94b6d4c CR3: 00000003b7e14000 CR4: 00000000003406e0
Dec 13 16:35:07 kaveri kernel: [   52.790132] Call Trace:
Dec 13 16:35:07 kaveri kernel: [   52.790146]  ? drm_gem_object_release_handle+0x146/0x1d0 [drm]
Dec 13 16:35:07 kaveri kernel: [   52.790149]  ? refcount_inc_checked+0x30/0x30
Dec 13 16:35:07 kaveri kernel: [   52.790153]  ? __mutex_unlock_slowpath+0xe1/0x680
Dec 13 16:35:07 kaveri kernel: [   52.790159]  ttm_bo_put+0x1f/0xca0 [ttm]
Dec 13 16:35:07 kaveri kernel: [   52.790229]  ? amdgpu_mn_unregister+0x23f/0x310 [amdgpu]
Dec 13 16:35:07 kaveri kernel: [   52.790288]  amdgpu_bo_unref+0x31/0x70 [amdgpu]
Dec 13 16:35:07 kaveri kernel: [   52.790346]  amdgpu_gem_object_free+0x72/0xa0 [amdgpu]
Dec 13 16:35:07 kaveri kernel: [   52.790404]  ? amdgpu_debugfs_gem_info+0x310/0x310 [amdgpu]
Dec 13 16:35:07 kaveri kernel: [   52.790418]  ? drm_gem_object_free+0x16c/0x1b0 [drm]
Dec 13 16:35:07 kaveri kernel: [   52.790432]  drm_gem_object_release_handle+0x12a/0x1d0 [drm]
Dec 13 16:35:07 kaveri kernel: [   52.790446]  ? drm_gem_object_handle_put_unlocked+0x260/0x260 [drm]
Dec 13 16:35:07 kaveri kernel: [   52.790449]  idr_for_each+0xef/0x1d0
Dec 13 16:35:07 kaveri kernel: [   52.790453]  ? idr_find+0x50/0x50
Dec 13 16:35:07 kaveri kernel: [   52.790457]  ? mark_held_locks+0xc1/0x140
Dec 13 16:35:07 kaveri kernel: [   52.790460]  ? _raw_spin_unlock_irqrestore+0x3c/0x50
Dec 13 16:35:07 kaveri kernel: [   52.790475]  drm_gem_release+0x1c/0x30 [drm]
Dec 13 16:35:07 kaveri kernel: [   52.790488]  drm_file_free.part.3+0x96b/0xe30 [drm]
Dec 13 16:35:07 kaveri kernel: [   52.790504]  drm_release+0x231/0x3f0 [drm]
Dec 13 16:35:07 kaveri kernel: [   52.790507]  ? fsnotify_first_mark+0x130/0x130
Dec 13 16:35:07 kaveri kernel: [   52.790520]  ? drm_lastclose+0x2c0/0x2c0 [drm]
Dec 13 16:35:07 kaveri kernel: [   52.790524]  ? security_file_free+0x3f/0x70
Dec 13 16:35:07 kaveri kernel: [   52.790528]  __fput+0x235/0x710
Dec 13 16:35:07 kaveri kernel: [   52.790533]  task_work_run+0x10e/0x180
Dec 13 16:35:07 kaveri kernel: [   52.790537]  do_exit+0x952/0x2880
Dec 13 16:35:07 kaveri kernel: [   52.790542]  ? mm_update_next_owner+0x600/0x600
Dec 13 16:35:07 kaveri kernel: [   52.790545]  ? __do_page_fault+0x472/0xaa0
Dec 13 16:35:07 kaveri kernel: [   52.790550]  ? lock_downgrade+0x5d0/0x5d0
Dec 13 16:35:07 kaveri kernel: [   52.790553]  ? handle_mm_fault+0x4db/0x750
Dec 13 16:35:07 kaveri kernel: [   52.790558]  do_group_exit+0xf0/0x2e0
Dec 13 16:35:07 kaveri kernel: [   52.790562]  __x64_sys_exit_group+0x3a/0x50
Dec 13 16:35:07 kaveri kernel: [   52.790565]  do_syscall_64+0x9c/0x3d0
Dec 13 16:35:07 kaveri kernel: [   52.790568]  entry_SYSCALL_64_after_hwframe+0x49/0xbe
Dec 13 16:35:07 kaveri kernel: [   52.790571] RIP: 0033:0x7fc8b94b6d76
Dec 13 16:35:07 kaveri kernel: [   52.790573] Code: Bad RIP value.
Dec 13 16:35:07 kaveri kernel: [   52.790575] RSP: 002b:00007ffd15f91bb8 EFLAGS: 00000246 ORIG_RAX: 00000000000000e7
Dec 13 16:35:07 kaveri kernel: [   52.790578] RAX: ffffffffffffffda RBX: 00007fc8b95a7760 RCX: 00007fc8b94b6d76
Dec 13 16:35:07 kaveri kernel: [   52.790580] RDX: 0000000000000000 RSI: 000000000000003c RDI: 0000000000000000
Dec 13 16:35:07 kaveri kernel: [   52.790582] RBP: 0000000000000000 R08: 00000000000000e7 R09: fffffffffffffd58
Dec 13 16:35:07 kaveri kernel: [   52.790584] R10: 0000000000000007 R11: 0000000000000246 R12: 00007fc8b95a7760
Dec 13 16:35:07 kaveri kernel: [   52.790586] R13: 00000000000004b3 R14: 00007fc8b95b0428 R15: 0000000000000000
Dec 13 16:35:07 kaveri kernel: [   52.790592] irq event stamp: 653260
Dec 13 16:35:07 kaveri kernel: [   52.790595] hardirqs last  enabled at (653259): [<ffffffff8c292cb7>] quarantine_put+0xb7/0x150
Dec 13 16:35:07 kaveri kernel: [   52.790598] hardirqs last disabled at (653260): [<ffffffff8d3815e2>] _raw_spin_lock_irqsave+0x12/0x40
Dec 13 16:35:07 kaveri kernel: [   52.790601] softirqs last  enabled at (652688): [<ffffffff8d6005d4>] __do_softirq+0x5d4/0x86e
Dec 13 16:35:07 kaveri kernel: [   52.790603] softirqs last disabled at (652681): [<ffffffff8bd3afc2>] irq_exit+0x1a2/0x1d0
Dec 13 16:35:07 kaveri kernel: [   52.790605] ---[ end trace 2ad62cfc2ba4843e ]---
Dec 13 16:35:07 kaveri kernel: [   52.815205] ==================================================================
Dec 13 16:35:07 kaveri kernel: [   52.815217] BUG: KASAN: double-free or invalid-free in rcu_process_callbacks+0x8fc/0xf40
Dec 13 16:35:07 kaveri kernel: [   52.815220] 
Dec 13 16:35:07 kaveri kernel: [   52.815226] CPU: 13 PID: 0 Comm: swapper/13 Tainted: G    B   W  OE     4.20.0-rc3+ #118
Dec 13 16:35:07 kaveri kernel: [   52.815230] Hardware name: Micro-Star International Co., Ltd. MS-7A34/B350 TOMAHAWK (MS-7A34), BIOS 1.80 09/13/2017
Dec 13 16:35:07 kaveri kernel: [   52.815233] Call Trace:
Dec 13 16:35:07 kaveri kernel: [   52.815236]  <IRQ>
Dec 13 16:35:07 kaveri kernel: [   52.815243]  dump_stack+0x7c/0xc0
Dec 13 16:35:07 kaveri kernel: [   52.815249]  print_address_description+0x65/0x22e
Dec 13 16:35:07 kaveri kernel: [   52.815253]  ? rcu_process_callbacks+0x8fc/0xf40
Dec 13 16:35:07 kaveri kernel: [   52.815258]  kasan_report_invalid_free+0x65/0xa0
Dec 13 16:35:07 kaveri kernel: [   52.815263]  ? rcu_process_callbacks+0x8fc/0xf40
Dec 13 16:35:07 kaveri kernel: [   52.815267]  __kasan_slab_free+0x157/0x170
Dec 13 16:35:07 kaveri kernel: [   52.815271]  ? rcu_process_callbacks+0x8fc/0xf40
Dec 13 16:35:07 kaveri kernel: [   52.815275]  kfree+0xe2/0x290
Dec 13 16:35:07 kaveri kernel: [   52.815280]  rcu_process_callbacks+0x8fc/0xf40
Dec 13 16:35:07 kaveri kernel: [   52.815286]  ? rcu_note_context_switch+0x380/0x380
Dec 13 16:35:07 kaveri kernel: [   52.815293]  ? run_rebalance_domains+0x1f8/0x2b0
Dec 13 16:35:07 kaveri kernel: [   52.815299]  __do_softirq+0x20f/0x86e
Dec 13 16:35:07 kaveri kernel: [   52.815307]  irq_exit+0x1a2/0x1d0
Dec 13 16:35:07 kaveri kernel: [   52.815311]  smp_apic_timer_interrupt+0xfe/0x450
Dec 13 16:35:07 kaveri kernel: [   52.815316]  apic_timer_interrupt+0xf/0x20
Dec 13 16:35:07 kaveri kernel: [   52.815319]  </IRQ>
Dec 13 16:35:07 kaveri kernel: [   52.815325] RIP: 0010:cpuidle_enter_state+0x104/0x7c0
Dec 13 16:35:07 kaveri kernel: [   52.815329] Code: 00 31 ff e8 7e bf 03 ff 80 7c 24 0c 00 74 12 9c 58 f6 c4 02 0f 85 39 05 00 00 31 ff e8 b5 dd 16 ff e8 80 02 28 ff fb 45 85 e4 <0f> 88 a8 04 00 00 48 b8 ff ff ff ff f3 01 00 00 48 2b 2c 24 48 39
Dec 13 16:35:07 kaveri kernel: [   52.815333] RSP: 0018:ffff8883eb8ffd80 EFLAGS: 00000202 ORIG_RAX: ffffffffffffff13
Dec 13 16:35:07 kaveri kernel: [   52.815338] RAX: 0000000000000007 RBX: ffff8883e51d3f00 RCX: 000000000000001f
Dec 13 16:35:07 kaveri kernel: [   52.815341] RDX: 0000000000000000 RSI: 000000002aaaac88 RDI: ffffffff8e2f9d74
Dec 13 16:35:07 kaveri kernel: [   52.815345] RBP: 0000000c4c069a24 R08: 0000000000000002 R09: 000000000002a500
Dec 13 16:35:07 kaveri kernel: [   52.815348] R10: 00000033a422e73c R11: ffff8883ee16bc7b R12: 0000000000000002
Dec 13 16:35:07 kaveri kernel: [   52.815351] R13: 0000000000000002 R14: 0000000000000002 R15: 00000000000000c0
Dec 13 16:35:07 kaveri kernel: [   52.815363]  do_idle+0x325/0x3d0
Dec 13 16:35:07 kaveri kernel: [   52.815368]  ? arch_cpu_idle_exit+0x40/0x40
Dec 13 16:35:07 kaveri kernel: [   52.815376]  cpu_startup_entry+0x19/0x20
Dec 13 16:35:07 kaveri kernel: [   52.815381]  start_secondary+0x3ae/0x4c0
Dec 13 16:35:07 kaveri kernel: [   52.815386]  ? set_cpu_sibling_map+0x1730/0x1730
Dec 13 16:35:07 kaveri kernel: [   52.815396]  secondary_startup_64+0xa4/0xb0
Dec 13 16:35:07 kaveri kernel: [   52.815403] 
Dec 13 16:35:07 kaveri kernel: [   52.815406] Allocated by task 2010:
Dec 13 16:35:07 kaveri kernel: [   52.815411]  kasan_kmalloc+0xbf/0xe0
Dec 13 16:35:07 kaveri kernel: [   52.815415]  kmem_cache_alloc_trace+0x12d/0x290
Dec 13 16:35:07 kaveri kernel: [   52.815420]  reservation_object_reserve_shared+0xdd/0x740
Dec 13 16:35:07 kaveri kernel: [   52.815429]  ttm_bo_mem_space+0x8b/0xf20 [ttm]
Dec 13 16:35:07 kaveri kernel: [   52.815437]  ttm_bo_validate+0x2df/0x4a0 [ttm]
Dec 13 16:35:07 kaveri kernel: [   52.815521]  amdgpu_bo_pin_restricted+0x39f/0x840 [amdgpu]
Dec 13 16:35:07 kaveri kernel: [   52.815627]  dm_plane_helper_prepare_fb+0x1d9/0xcf0 [amdgpu]
Dec 13 16:35:07 kaveri kernel: [   52.815640]  drm_atomic_helper_prepare_planes+0x12e/0x500 [drm_kms_helper]
Dec 13 16:35:07 kaveri kernel: [   52.815652]  drm_atomic_helper_commit+0x4a/0x240 [drm_kms_helper]
Dec 13 16:35:07 kaveri kernel: [   52.815663]  drm_atomic_helper_update_plane+0x2a0/0x350 [drm_kms_helper]
Dec 13 16:35:07 kaveri kernel: [   52.815685]  drm_mode_cursor_universal+0x3e6/0xb30 [drm]
Dec 13 16:35:07 kaveri kernel: [   52.815706]  drm_mode_cursor_common+0x4d0/0x8c0 [drm]
Dec 13 16:35:07 kaveri kernel: [   52.815725]  drm_ioctl_kernel+0x1c6/0x260 [drm]
Dec 13 16:35:07 kaveri kernel: [   52.815744]  drm_ioctl+0x403/0x850 [drm]
Dec 13 16:35:07 kaveri kernel: [   52.815825]  amdgpu_drm_ioctl+0xcc/0x1b0 [amdgpu]
Dec 13 16:35:07 kaveri kernel: [   52.815829]  do_vfs_ioctl+0x193/0xfd0
Dec 13 16:35:07 kaveri kernel: [   52.815833]  ksys_ioctl+0x60/0x90
Dec 13 16:35:07 kaveri kernel: [   52.815836]  __x64_sys_ioctl+0x6f/0xb0
Dec 13 16:35:07 kaveri kernel: [   52.815840]  do_syscall_64+0x9c/0x3d0
Dec 13 16:35:07 kaveri kernel: [   52.815844]  entry_SYSCALL_64_after_hwframe+0x49/0xbe
Dec 13 16:35:07 kaveri kernel: [   52.815846] 
Dec 13 16:35:07 kaveri kernel: [   52.815849] Freed by task 2010:
Dec 13 16:35:07 kaveri kernel: [   52.815853]  __kasan_slab_free+0x125/0x170
Dec 13 16:35:07 kaveri kernel: [   52.815856]  kfree+0xe2/0x290
Dec 13 16:35:07 kaveri kernel: [   52.815864]  ttm_bo_release_list+0x3b0/0x560 [ttm]
Dec 13 16:35:07 kaveri kernel: [   52.815872]  ttm_bo_vm_close+0x34/0x70 [ttm]
Dec 13 16:35:07 kaveri kernel: [   52.815876]  remove_vma+0x92/0x130
Dec 13 16:35:07 kaveri kernel: [   52.815879]  exit_mmap+0x292/0x400
Dec 13 16:35:07 kaveri kernel: [   52.815883]  mmput+0xb2/0x390
Dec 13 16:35:07 kaveri kernel: [   52.815887]  do_exit+0x8c4/0x2880
Dec 13 16:35:07 kaveri kernel: [   52.815891]  do_group_exit+0xf0/0x2e0
Dec 13 16:35:07 kaveri kernel: [   52.815895]  __x64_sys_exit_group+0x3a/0x50
Dec 13 16:35:07 kaveri kernel: [   52.815898]  do_syscall_64+0x9c/0x3d0
Dec 13 16:35:07 kaveri kernel: [   52.815902]  entry_SYSCALL_64_after_hwframe+0x49/0xbe
Dec 13 16:35:07 kaveri kernel: [   52.815904] 
Dec 13 16:35:07 kaveri kernel: [   52.815908] The buggy address belongs to the object at ffff8883d620e060
Dec 13 16:35:07 kaveri kernel: [   52.815908]  which belongs to the cache kmalloc-64 of size 64
Dec 13 16:35:07 kaveri kernel: [   52.815912] The buggy address is located 0 bytes inside of
Dec 13 16:35:07 kaveri kernel: [   52.815912]  64-byte region [ffff8883d620e060, ffff8883d620e0a0)
Dec 13 16:35:07 kaveri kernel: [   52.815916] The buggy address belongs to the page:
Dec 13 16:35:07 kaveri kernel: [   52.815920] page:ffffea000f588380 count:1 mapcount:0 mapping:ffff8883ed80f600 index:0x0
Dec 13 16:35:07 kaveri kernel: [   52.815924] flags: 0x17fffc000000200(slab)
Dec 13 16:35:07 kaveri kernel: [   52.815930] raw: 017fffc000000200 ffffea000f63ad80 0000001000000010 ffff8883ed80f600
Dec 13 16:35:07 kaveri kernel: [   52.815934] raw: 0000000000000000 00000000002a002a 00000001ffffffff 0000000000000000
Dec 13 16:35:07 kaveri kernel: [   52.815937] page dumped because: kasan: bad access detected
Dec 13 16:35:07 kaveri kernel: [   52.815939] 
Dec 13 16:35:07 kaveri kernel: [   52.815941] Memory state around the buggy address:
Dec 13 16:35:07 kaveri kernel: [   52.815945]  ffff8883d620df00: fb fb fb fb fc fc fb fb fb fb fc fc 00 00 00 00
Dec 13 16:35:07 kaveri kernel: [   52.815949]  ffff8883d620df80: fc fc fb fb fb fb fc fc fb fb fb fb fc fc fc fc
Dec 13 16:35:07 kaveri kernel: [   52.815952] >ffff8883d620e000: fb fb fb fb fb fb fb fb fc fc fc fc fb fb fb fb
Dec 13 16:35:07 kaveri kernel: [   52.815955]                                                        ^
Dec 13 16:35:07 kaveri kernel: [   52.815958]  ffff8883d620e080: fb fb fb fb fc fc fc fc fb fb fb fb fb fb fb fb
Dec 13 16:35:07 kaveri kernel: [   52.815962]  ffff8883d620e100: fc fc fc fc 00 00 00 00 00 00 00 fc fc fc fc fc
Dec 13 16:35:07 kaveri kernel: [   52.815965] ==================================================================

[-- Attachment #3: Type: text/plain, Size: 154 bytes --]

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] drm/amd/display: Add fast path for cursor plane updates
       [not found]     ` <d9cde6c1-90e1-da3e-7edd-eb3570da666c-otUistvHUpPR7s880joybQ@public.gmane.org>
@ 2018-12-13 16:01       ` Kazlauskas, Nicholas
       [not found]         ` <4d026379-d727-4e71-bc45-c16d0ea39035-5C7GfCeVMHo@public.gmane.org>
  0 siblings, 1 reply; 16+ messages in thread
From: Kazlauskas, Nicholas @ 2018-12-13 16:01 UTC (permalink / raw)
  To: Michel Dänzer
  Cc: Li, Sun peng (Leo),
	Wentland, Harry, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

On 12/13/18 10:48 AM, Michel Dänzer wrote:
> On 2018-12-05 8:59 p.m., Nicholas Kazlauskas wrote:
>> [Why]
>> Legacy cursor plane updates from drm helpers go through the full
>> atomic codepath. A high volume of cursor updates through this slow
>> code path can cause subsequent page-flips to skip vblank intervals
>> since each individual update is slow.
>>
>> This problem is particularly noticeable for the compton compositor.
>>
>> [How]
>> A fast path for cursor plane updates is added by using DRM asynchronous
>> commit support provided by async_check and async_update. These don't do
>> a full state/flip_done dependency stall and they don't block other
>> commit work.
>>
>> However, DC still expects itself to be single-threaded for anything
>> that can issue register writes. Screen corruption or hangs can occur
>> if write sequences overlap. Every call that potentially perform
>> register writes needs to be guarded for asynchronous updates to work.
>> The dc_lock mutex was added for this.
>>
>> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=106175
>>
>> Cc: Leo Li <sunpeng.li@amd.com>
>> Cc: Harry Wentland <harry.wentland@amd.com>
>> Signed-off-by: Nicholas Kazlauskas <nicholas.kazlauskas@amd.com>
> 
> Looks like this change introduced (or at least exposed) a reference
> counting bug resulting in use-after-free when Xorg shuts down[0]. See
> the attached dmesg excerpt (note that I wrapped the !bo->pin_count check
> in amdgpu_bo_unpin in WARN_ON_ONCE).
> 
> 
> [0] Only with
> https://gitlab.freedesktop.org/xorg/driver/xf86-video-amdgpu/commit/0d60233d26ec70d4e1faa343b438e33829c6d5e4
> , i.e. alternating between two BOs for the HW cursor, instead of always
> using the same one.
> 
> 

Thanks for the heads up, I don't think I had that patch in my 
xf86-video-amdgpu when testing the desktop stack.

The async atomic helper does the:

drm_atomic_helper_prepare_planes
drm_atomic_helper_async_commit
drm_atomic_helper_cleanup_planes

...sequence correctly from what I can tell, so maybe it's something with
dm_plane_helper_prepare_fb or dm_plane_helper_cleanup_fb itself.

One case where unref could be called (not following a ref) is during 
drm_atomic_helper_prepare_planes - if prepare_fb fails then cleanup_fb 
gets called regardless, and we only ref the fb if prepare_fb is in the 
success path.

Nicholas Kazlauskas
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] drm/amd/display: Add fast path for cursor plane updates
       [not found]         ` <4d026379-d727-4e71-bc45-c16d0ea39035-5C7GfCeVMHo@public.gmane.org>
@ 2018-12-13 19:21           ` Kazlauskas, Nicholas
       [not found]             ` <9888914f-2d7a-62d5-b696-f56226f6dcb5-5C7GfCeVMHo@public.gmane.org>
  0 siblings, 1 reply; 16+ messages in thread
From: Kazlauskas, Nicholas @ 2018-12-13 19:21 UTC (permalink / raw)
  To: Michel Dänzer
  Cc: Li, Sun peng (Leo),
	Wentland, Harry, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

On 12/13/18 11:01 AM, Kazlauskas, Nicholas wrote:
> On 12/13/18 10:48 AM, Michel Dänzer wrote:
>> On 2018-12-05 8:59 p.m., Nicholas Kazlauskas wrote:
>>> [Why]
>>> Legacy cursor plane updates from drm helpers go through the full
>>> atomic codepath. A high volume of cursor updates through this slow
>>> code path can cause subsequent page-flips to skip vblank intervals
>>> since each individual update is slow.
>>>
>>> This problem is particularly noticeable for the compton compositor.
>>>
>>> [How]
>>> A fast path for cursor plane updates is added by using DRM asynchronous
>>> commit support provided by async_check and async_update. These don't do
>>> a full state/flip_done dependency stall and they don't block other
>>> commit work.
>>>
>>> However, DC still expects itself to be single-threaded for anything
>>> that can issue register writes. Screen corruption or hangs can occur
>>> if write sequences overlap. Every call that potentially perform
>>> register writes needs to be guarded for asynchronous updates to work.
>>> The dc_lock mutex was added for this.
>>>
>>> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=106175
>>>
>>> Cc: Leo Li <sunpeng.li@amd.com>
>>> Cc: Harry Wentland <harry.wentland@amd.com>
>>> Signed-off-by: Nicholas Kazlauskas <nicholas.kazlauskas@amd.com>
>>
>> Looks like this change introduced (or at least exposed) a reference
>> counting bug resulting in use-after-free when Xorg shuts down[0]. See
>> the attached dmesg excerpt (note that I wrapped the !bo->pin_count check
>> in amdgpu_bo_unpin in WARN_ON_ONCE).
>>
>>
>> [0] Only with
>> https://gitlab.freedesktop.org/xorg/driver/xf86-video-amdgpu/commit/0d60233d26ec70d4e1faa343b438e33829c6d5e4
>> , i.e. alternating between two BOs for the HW cursor, instead of always
>> using the same one.
>>
>>
> 
> Thanks for the heads up, I don't think I had that patch in my
> xf86-video-amdgpu when testing the desktop stack.
> 
> The async atomic helper does the:
> 
> drm_atomic_helper_prepare_planes
> drm_atomic_helper_async_commit
> drm_atomic_helper_cleanup_planes
> 
> ...sequence correctly from what I can tell, so maybe it's something with
> dm_plane_helper_prepare_fb or dm_plane_helper_cleanup_fb itself.
> 
> One case where unref could be called (not following a ref) is during
> drm_atomic_helper_prepare_planes - if prepare_fb fails then cleanup_fb
> gets called regardless, and we only ref the fb if prepare_fb is in the
> success path.
> 
> Nicholas Kazlauskas
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
> 

The prepare_fb/cleanup_fb calls are actually fine since cleanup_fb only 
gets called on planes that had prepare_fb succeed in all cases as far as 
I can tell.

I think the bug here might be forgetting to set the plane->state to the 
new_state. The cleanup fb callback decides whether to call it on the old 
plane state or new plane state depending on if the commit was aborted or 
not. I think every fast plane update might be treated as aborted in this 
case.

Nicholas Kazlauskas
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] drm/amd/display: Add fast path for cursor plane updates
       [not found]             ` <9888914f-2d7a-62d5-b696-f56226f6dcb5-5C7GfCeVMHo@public.gmane.org>
@ 2018-12-13 20:59               ` Kazlauskas, Nicholas
       [not found]                 ` <9431c7f6-5935-30bb-4c23-71e5cc1bb956-5C7GfCeVMHo@public.gmane.org>
  0 siblings, 1 reply; 16+ messages in thread
From: Kazlauskas, Nicholas @ 2018-12-13 20:59 UTC (permalink / raw)
  To: Michel Dänzer
  Cc: Li, Sun peng (Leo),
	Wentland, Harry, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

On 12/13/18 2:21 PM, Kazlauskas, Nicholas wrote:
> On 12/13/18 11:01 AM, Kazlauskas, Nicholas wrote:
>> On 12/13/18 10:48 AM, Michel Dänzer wrote:
>>> On 2018-12-05 8:59 p.m., Nicholas Kazlauskas wrote:
>>>> [Why]
>>>> Legacy cursor plane updates from drm helpers go through the full
>>>> atomic codepath. A high volume of cursor updates through this slow
>>>> code path can cause subsequent page-flips to skip vblank intervals
>>>> since each individual update is slow.
>>>>
>>>> This problem is particularly noticeable for the compton compositor.
>>>>
>>>> [How]
>>>> A fast path for cursor plane updates is added by using DRM asynchronous
>>>> commit support provided by async_check and async_update. These don't do
>>>> a full state/flip_done dependency stall and they don't block other
>>>> commit work.
>>>>
>>>> However, DC still expects itself to be single-threaded for anything
>>>> that can issue register writes. Screen corruption or hangs can occur
>>>> if write sequences overlap. Every call that potentially perform
>>>> register writes needs to be guarded for asynchronous updates to work.
>>>> The dc_lock mutex was added for this.
>>>>
>>>> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=106175
>>>>
>>>> Cc: Leo Li <sunpeng.li@amd.com>
>>>> Cc: Harry Wentland <harry.wentland@amd.com>
>>>> Signed-off-by: Nicholas Kazlauskas <nicholas.kazlauskas@amd.com>
>>>
>>> Looks like this change introduced (or at least exposed) a reference
>>> counting bug resulting in use-after-free when Xorg shuts down[0]. See
>>> the attached dmesg excerpt (note that I wrapped the !bo->pin_count check
>>> in amdgpu_bo_unpin in WARN_ON_ONCE).
>>>
>>>
>>> [0] Only with
>>> https://gitlab.freedesktop.org/xorg/driver/xf86-video-amdgpu/commit/0d60233d26ec70d4e1faa343b438e33829c6d5e4
>>> , i.e. alternating between two BOs for the HW cursor, instead of always
>>> using the same one.
>>>
>>>
>>
>> Thanks for the heads up, I don't think I had that patch in my
>> xf86-video-amdgpu when testing the desktop stack.
>>
>> The async atomic helper does the:
>>
>> drm_atomic_helper_prepare_planes
>> drm_atomic_helper_async_commit
>> drm_atomic_helper_cleanup_planes
>>
>> ...sequence correctly from what I can tell, so maybe it's something with
>> dm_plane_helper_prepare_fb or dm_plane_helper_cleanup_fb itself.
>>
>> One case where unref could be called (not following a ref) is during
>> drm_atomic_helper_prepare_planes - if prepare_fb fails then cleanup_fb
>> gets called regardless, and we only ref the fb if prepare_fb is in the
>> success path.
>>
>> Nicholas Kazlauskas
>> _______________________________________________
>> amd-gfx mailing list
>> amd-gfx@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
>>
> 
> The prepare_fb/cleanup_fb calls are actually fine since cleanup_fb only
> gets called on planes that had prepare_fb succeed in all cases as far as
> I can tell.
> 
> I think the bug here might be forgetting to set the plane->state to the
> new_state. The cleanup fb callback decides whether to call it on the old
> plane state or new plane state depending on if the commit was aborted or
> not. I think every fast plane update might be treated as aborted in this
> case.
> 
> Nicholas Kazlauskas
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
> 

This is a bug with DRM, actually.

Typically for a regular atomic commit the prepare_fb callback is called 
for the new_plane_state and cleanup_fb is called for the old_plane_state 
at the end of commit tail.

However, for asynchronous commits this isn't the same - prepare_fb is 
called for new_plane_state and cleanup_fb is then immediately called 
after also for the new_plane_state.

Looking at your stack trace I can see that this is exactly what causes 
the use after free,

The CRTC has changed so it's stuck in the slow path (commit_tail is in 
the trace). However, the plane->state->fb has already been unpinned and 
unref. But the plane->state->fb is *not* NULL from the previous fast 
update, so when it gets to cleanup planes it tries to free the 
old_plane_state it unpins and unrefs the bo a second time.

Then a new fast cursor update comes along (and the fb hasn't changed) so 
it tries to prepare_fb on the same freed bo.

Nicholas Kazlauskas
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] drm/amd/display: Add fast path for cursor plane updates
       [not found]                 ` <9431c7f6-5935-30bb-4c23-71e5cc1bb956-5C7GfCeVMHo@public.gmane.org>
@ 2018-12-14  8:46                   ` Michel Dänzer
       [not found]                     ` <c33a43d5-2447-77a4-c043-e12d38ebc552-otUistvHUpPR7s880joybQ@public.gmane.org>
  0 siblings, 1 reply; 16+ messages in thread
From: Michel Dänzer @ 2018-12-14  8:46 UTC (permalink / raw)
  To: Kazlauskas, Nicholas
  Cc: Li, Sun peng (Leo),
	Wentland, Harry, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

On 2018-12-13 9:59 p.m., Kazlauskas, Nicholas wrote:
> On 12/13/18 2:21 PM, Kazlauskas, Nicholas wrote:
>> On 12/13/18 11:01 AM, Kazlauskas, Nicholas wrote:
>>> On 12/13/18 10:48 AM, Michel Dänzer wrote:
>>>> On 2018-12-05 8:59 p.m., Nicholas Kazlauskas wrote:
>>>>> [Why]
>>>>> Legacy cursor plane updates from drm helpers go through the full
>>>>> atomic codepath. A high volume of cursor updates through this slow
>>>>> code path can cause subsequent page-flips to skip vblank intervals
>>>>> since each individual update is slow.
>>>>>
>>>>> This problem is particularly noticeable for the compton compositor.
>>>>>
>>>>> [How]
>>>>> A fast path for cursor plane updates is added by using DRM asynchronous
>>>>> commit support provided by async_check and async_update. These don't do
>>>>> a full state/flip_done dependency stall and they don't block other
>>>>> commit work.
>>>>>
>>>>> However, DC still expects itself to be single-threaded for anything
>>>>> that can issue register writes. Screen corruption or hangs can occur
>>>>> if write sequences overlap. Every call that potentially perform
>>>>> register writes needs to be guarded for asynchronous updates to work.
>>>>> The dc_lock mutex was added for this.
>>>>>
>>>>> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=106175
>>>>>
>>>>> Cc: Leo Li <sunpeng.li@amd.com>
>>>>> Cc: Harry Wentland <harry.wentland@amd.com>
>>>>> Signed-off-by: Nicholas Kazlauskas <nicholas.kazlauskas@amd.com>
>>>>
>>>> Looks like this change introduced (or at least exposed) a reference
>>>> counting bug resulting in use-after-free when Xorg shuts down[0]. See
>>>> the attached dmesg excerpt (note that I wrapped the !bo->pin_count check
>>>> in amdgpu_bo_unpin in WARN_ON_ONCE).
>>>>
>>>>
>>>> [0] Only with
>>>> https://gitlab.freedesktop.org/xorg/driver/xf86-video-amdgpu/commit/0d60233d26ec70d4e1faa343b438e33829c6d5e4
>>>> , i.e. alternating between two BOs for the HW cursor, instead of always
>>>> using the same one.
>>>>
>>>>
>>>
>>> Thanks for the heads up, I don't think I had that patch in my
>>> xf86-video-amdgpu when testing the desktop stack.
>>>
>>> The async atomic helper does the:
>>>
>>> drm_atomic_helper_prepare_planes
>>> drm_atomic_helper_async_commit
>>> drm_atomic_helper_cleanup_planes
>>>
>>> ...sequence correctly from what I can tell, so maybe it's something with
>>> dm_plane_helper_prepare_fb or dm_plane_helper_cleanup_fb itself.
>>>
>>> One case where unref could be called (not following a ref) is during
>>> drm_atomic_helper_prepare_planes - if prepare_fb fails then cleanup_fb
>>> gets called regardless, and we only ref the fb if prepare_fb is in the
>>> success path.
>>
>> The prepare_fb/cleanup_fb calls are actually fine since cleanup_fb only
>> gets called on planes that had prepare_fb succeed in all cases as far as
>> I can tell.
>>
>> I think the bug here might be forgetting to set the plane->state to the
>> new_state. The cleanup fb callback decides whether to call it on the old
>> plane state or new plane state depending on if the commit was aborted or
>> not. I think every fast plane update might be treated as aborted in this
>> case.
> 
> This is a bug with DRM, actually.
> 
> Typically for a regular atomic commit the prepare_fb callback is called 
> for the new_plane_state and cleanup_fb is called for the old_plane_state 
> at the end of commit tail.
> 
> However, for asynchronous commits this isn't the same - prepare_fb is 
> called for new_plane_state and cleanup_fb is then immediately called 
> after also for the new_plane_state.
> 
> Looking at your stack trace I can see that this is exactly what causes 
> the use after free,
> 
> The CRTC has changed so it's stuck in the slow path (commit_tail is in 
> the trace). However, the plane->state->fb has already been unpinned and 
> unref. But the plane->state->fb is *not* NULL from the previous fast 
> update, so when it gets to cleanup planes it tries to free the 
> old_plane_state it unpins and unrefs the bo a second time.
> 
> Then a new fast cursor update comes along (and the fb hasn't changed) so 
> it tries to prepare_fb on the same freed bo.

Do you have an idea for a fix? If not, I'm afraid we need to revert this
change again for now, as the consequences can be severe (in one case,
ext4 code started complaining, I couldn't reboot cleanly and had to fsck
afterwards).


-- 
Earthling Michel Dänzer               |               http://www.amd.com
Libre software enthusiast             |             Mesa and X developer
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] drm/amd/display: Add fast path for cursor plane updates
       [not found]                     ` <c33a43d5-2447-77a4-c043-e12d38ebc552-otUistvHUpPR7s880joybQ@public.gmane.org>
@ 2018-12-14 14:01                       ` Kazlauskas, Nicholas
  0 siblings, 0 replies; 16+ messages in thread
From: Kazlauskas, Nicholas @ 2018-12-14 14:01 UTC (permalink / raw)
  To: Michel Dänzer
  Cc: Li, Sun peng (Leo),
	Wentland, Harry, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

On 12/14/18 3:46 AM, Michel Dänzer wrote:
> On 2018-12-13 9:59 p.m., Kazlauskas, Nicholas wrote:
>> On 12/13/18 2:21 PM, Kazlauskas, Nicholas wrote:
>>> On 12/13/18 11:01 AM, Kazlauskas, Nicholas wrote:
>>>> On 12/13/18 10:48 AM, Michel Dänzer wrote:
>>>>> On 2018-12-05 8:59 p.m., Nicholas Kazlauskas wrote:
>>>>>> [Why]
>>>>>> Legacy cursor plane updates from drm helpers go through the full
>>>>>> atomic codepath. A high volume of cursor updates through this slow
>>>>>> code path can cause subsequent page-flips to skip vblank intervals
>>>>>> since each individual update is slow.
>>>>>>
>>>>>> This problem is particularly noticeable for the compton compositor.
>>>>>>
>>>>>> [How]
>>>>>> A fast path for cursor plane updates is added by using DRM asynchronous
>>>>>> commit support provided by async_check and async_update. These don't do
>>>>>> a full state/flip_done dependency stall and they don't block other
>>>>>> commit work.
>>>>>>
>>>>>> However, DC still expects itself to be single-threaded for anything
>>>>>> that can issue register writes. Screen corruption or hangs can occur
>>>>>> if write sequences overlap. Every call that potentially perform
>>>>>> register writes needs to be guarded for asynchronous updates to work.
>>>>>> The dc_lock mutex was added for this.
>>>>>>
>>>>>> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=106175
>>>>>>
>>>>>> Cc: Leo Li <sunpeng.li@amd.com>
>>>>>> Cc: Harry Wentland <harry.wentland@amd.com>
>>>>>> Signed-off-by: Nicholas Kazlauskas <nicholas.kazlauskas@amd.com>
>>>>>
>>>>> Looks like this change introduced (or at least exposed) a reference
>>>>> counting bug resulting in use-after-free when Xorg shuts down[0]. See
>>>>> the attached dmesg excerpt (note that I wrapped the !bo->pin_count check
>>>>> in amdgpu_bo_unpin in WARN_ON_ONCE).
>>>>>
>>>>>
>>>>> [0] Only with
>>>>> https://gitlab.freedesktop.org/xorg/driver/xf86-video-amdgpu/commit/0d60233d26ec70d4e1faa343b438e33829c6d5e4
>>>>> , i.e. alternating between two BOs for the HW cursor, instead of always
>>>>> using the same one.
>>>>>
>>>>>
>>>>
>>>> Thanks for the heads up, I don't think I had that patch in my
>>>> xf86-video-amdgpu when testing the desktop stack.
>>>>
>>>> The async atomic helper does the:
>>>>
>>>> drm_atomic_helper_prepare_planes
>>>> drm_atomic_helper_async_commit
>>>> drm_atomic_helper_cleanup_planes
>>>>
>>>> ...sequence correctly from what I can tell, so maybe it's something with
>>>> dm_plane_helper_prepare_fb or dm_plane_helper_cleanup_fb itself.
>>>>
>>>> One case where unref could be called (not following a ref) is during
>>>> drm_atomic_helper_prepare_planes - if prepare_fb fails then cleanup_fb
>>>> gets called regardless, and we only ref the fb if prepare_fb is in the
>>>> success path.
>>>
>>> The prepare_fb/cleanup_fb calls are actually fine since cleanup_fb only
>>> gets called on planes that had prepare_fb succeed in all cases as far as
>>> I can tell.
>>>
>>> I think the bug here might be forgetting to set the plane->state to the
>>> new_state. The cleanup fb callback decides whether to call it on the old
>>> plane state or new plane state depending on if the commit was aborted or
>>> not. I think every fast plane update might be treated as aborted in this
>>> case.
>>
>> This is a bug with DRM, actually.
>>
>> Typically for a regular atomic commit the prepare_fb callback is called
>> for the new_plane_state and cleanup_fb is called for the old_plane_state
>> at the end of commit tail.
>>
>> However, for asynchronous commits this isn't the same - prepare_fb is
>> called for new_plane_state and cleanup_fb is then immediately called
>> after also for the new_plane_state.
>>
>> Looking at your stack trace I can see that this is exactly what causes
>> the use after free,
>>
>> The CRTC has changed so it's stuck in the slow path (commit_tail is in
>> the trace). However, the plane->state->fb has already been unpinned and
>> unref. But the plane->state->fb is *not* NULL from the previous fast
>> update, so when it gets to cleanup planes it tries to free the
>> old_plane_state it unpins and unrefs the bo a second time.
>>
>> Then a new fast cursor update comes along (and the fb hasn't changed) so
>> it tries to prepare_fb on the same freed bo.
> 
> Do you have an idea for a fix? If not, I'm afraid we need to revert this
> change again for now, as the consequences can be severe (in one case,
> ext4 code started complaining, I couldn't reboot cleanly and had to fsck
> afterwards).
> 
> 

Yeah, I have a workaround that I will post for this.

I'm not sure if this is intended behavior or not for DRM - it doesn't 
feel correct to me, but I can understand why it'd make sense in some 
cases. It really depends on what the cleanup_fb is intending to do. It 
certainly doesn't work with the pin/unpin model though.

Nicholas Kazlauskas
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 16+ messages in thread

end of thread, other threads:[~2018-12-14 14:01 UTC | newest]

Thread overview: 16+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-12-05 19:59 [PATCH] drm/amd/display: Add fast path for cursor plane updates Nicholas Kazlauskas
     [not found] ` <20181205195907.9501-1-nicholas.kazlauskas-5C7GfCeVMHo@public.gmane.org>
2018-12-05 20:26   ` Grodzovsky, Andrey
     [not found]     ` <8b305d43-56e1-5505-95d3-35e92f3f8494-5C7GfCeVMHo@public.gmane.org>
2018-12-05 20:42       ` Nicholas Kazlauskas
     [not found]         ` <6352276b-5d47-94ea-2dd1-0f955ceed408-5C7GfCeVMHo@public.gmane.org>
2018-12-05 20:44           ` Grodzovsky, Andrey
     [not found]             ` <73996dfc-1cd6-e707-9e2b-48a5c89d6cac-5C7GfCeVMHo@public.gmane.org>
2018-12-05 21:01               ` Nicholas Kazlauskas
     [not found]                 ` <5c4c64a1-cc84-e9c2-6275-ca8ac76237b1-5C7GfCeVMHo@public.gmane.org>
2018-12-06 13:42                   ` Kazlauskas, Nicholas
     [not found]                     ` <a7bad892-8215-16a8-65a6-328533395d03-5C7GfCeVMHo@public.gmane.org>
2018-12-06 15:36                       ` Grodzovsky, Andrey
     [not found]                         ` <13f4b0bc-fcda-be57-2416-aa3ba5ff065c-5C7GfCeVMHo@public.gmane.org>
2018-12-06 15:59                           ` Nicholas Kazlauskas
     [not found]                             ` <78e6ff7a-c7f4-324b-7fb2-9df282cb2c7e-5C7GfCeVMHo@public.gmane.org>
2018-12-06 16:12                               ` Grodzovsky, Andrey
2018-12-11 22:30   ` Li, Sun peng (Leo)
2018-12-13 15:48   ` Michel Dänzer
     [not found]     ` <d9cde6c1-90e1-da3e-7edd-eb3570da666c-otUistvHUpPR7s880joybQ@public.gmane.org>
2018-12-13 16:01       ` Kazlauskas, Nicholas
     [not found]         ` <4d026379-d727-4e71-bc45-c16d0ea39035-5C7GfCeVMHo@public.gmane.org>
2018-12-13 19:21           ` Kazlauskas, Nicholas
     [not found]             ` <9888914f-2d7a-62d5-b696-f56226f6dcb5-5C7GfCeVMHo@public.gmane.org>
2018-12-13 20:59               ` Kazlauskas, Nicholas
     [not found]                 ` <9431c7f6-5935-30bb-4c23-71e5cc1bb956-5C7GfCeVMHo@public.gmane.org>
2018-12-14  8:46                   ` Michel Dänzer
     [not found]                     ` <c33a43d5-2447-77a4-c043-e12d38ebc552-otUistvHUpPR7s880joybQ@public.gmane.org>
2018-12-14 14:01                       ` Kazlauskas, Nicholas

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.